#!/bin/bash # Example SLURM array script for flat dense retrieval (GTE 7B) with top-k=20. # # Usage: # 1. Set SLURM_ACCOUNT, PARTITIONS, PROJECT_DIR, and MODEL_NAME below. # 2. Make sure shards exist under $SHARD_ROOT/dataset/ and $SHARD_ROOT/ret_cache/ # (see scripts/make_v5_shards.py and scripts/build_retrieval_cache.py). # 3. sbatch scripts/slurm/example_dense_retrieval.slurm # #SBATCH -J dense_gte_topk20 #SBATCH -A ${SLURM_ACCOUNT:-your-account} #SBATCH -p ${PARTITIONS:-cpu} #SBATCH --nodes=1 #SBATCH --time=04:00:00 #SBATCH --array=0-7 #SBATCH --output=logs/dense_gte_topk20_%A_%a.log #SBATCH --export=ALL,NV_API_KEY set -euo pipefail PROJECT_DIR="${PROJECT_DIR:-$(pwd)}" cd "$PROJECT_DIR" MODEL_NAME="${MODEL_NAME:-gpt-5.5}" # any key from model_zoo.py TOP_K="${TOP_K:-20}" SHARD_ROOT="${SHARD_ROOT:-output/shards/v5_${MODEL_NAME//./_}_nchunks10}" shard_id=$(printf "%02d" "$SLURM_ARRAY_TASK_ID") export ret_cache="$SHARD_ROOT/ret_cache/shard_${shard_id}.jsonl" python main.py \ --in_file "$SHARD_ROOT/dataset/shard_${shard_id}.json" \ --out_file "$SHARD_ROOT/dense_gte_topk${TOP_K}/part_${shard_id}.jsonl" \ --model_name "$MODEL_NAME" \ --top_k "$TOP_K" \ --n_chunks 10 \ --nvidia \ --all_sessions_file dataset/all_sessions.json \ --no_semantic \ --mode embed