| # Example SLURM array script for flat dense retrieval (GTE 7B) with top-k=20. | |
| # | |
| # Usage: | |
| # 1. Set SLURM_ACCOUNT, PARTITIONS, PROJECT_DIR, and MODEL_NAME below. | |
| # 2. Make sure shards exist under $SHARD_ROOT/dataset/ and $SHARD_ROOT/ret_cache/ | |
| # (see scripts/make_v5_shards.py and scripts/build_retrieval_cache.py). | |
| # 3. sbatch scripts/slurm/example_dense_retrieval.slurm | |
| # | |
| #SBATCH -J dense_gte_topk20 | |
| #SBATCH -A ${SLURM_ACCOUNT:-your-account} | |
| #SBATCH -p ${PARTITIONS:-cpu} | |
| #SBATCH --nodes=1 | |
| #SBATCH --time=04:00:00 | |
| #SBATCH --array=0-7 | |
| #SBATCH --output=logs/dense_gte_topk20_%A_%a.log | |
| #SBATCH --export=ALL,NV_API_KEY | |
| set -euo pipefail | |
| PROJECT_DIR="${PROJECT_DIR:-$(pwd)}" | |
| cd "$PROJECT_DIR" | |
| MODEL_NAME="${MODEL_NAME:-gpt-5.5}" # any key from model_zoo.py | |
| TOP_K="${TOP_K:-20}" | |
| SHARD_ROOT="${SHARD_ROOT:-output/shards/v5_${MODEL_NAME//./_}_nchunks10}" | |
| shard_id=$(printf "%02d" "$SLURM_ARRAY_TASK_ID") | |
| export ret_cache="$SHARD_ROOT/ret_cache/shard_${shard_id}.jsonl" | |
| python main.py \ | |
| --in_file "$SHARD_ROOT/dataset/shard_${shard_id}.json" \ | |
| --out_file "$SHARD_ROOT/dense_gte_topk${TOP_K}/part_${shard_id}.jsonl" \ | |
| --model_name "$MODEL_NAME" \ | |
| --top_k "$TOP_K" \ | |
| --n_chunks 10 \ | |
| --nvidia \ | |
| --all_sessions_file dataset/all_sessions.json \ | |
| --no_semantic \ | |
| --mode embed | |