#!/bin/bash #SBATCH --job-name=bench_embed #SBATCH --partition=nova #SBATCH --gres=gpu:1 #SBATCH --cpus-per-task=8 #SBATCH --mem=32G #SBATCH --time=01:00:00 #SBATCH --output=bert_v6_contrastive/scripts/bench_embed_%j.out #SBATCH --error=bert_v6_contrastive/scripts/bench_embed_%j.err echo "========================================" echo "Benchmark Embedding Extraction + Visualization" echo "Job ID: $SLURM_JOB_ID" echo "Node: $(hostname)" echo "Started: $(date)" echo "========================================" cd /work/ratul1/supantha/glycan-SD-VS/bert_training_v3/v3.1_cluster_training source /work/ratul1/supantha/miniconda3/etc/profile.d/conda.sh conda activate glycan_bert 2>/dev/null || source activate glycan_bert 2>/dev/null || true pip install -q scikit-learn umap-learn 2>/dev/null SCRIPT=bert_v6_contrastive/scripts/embed_benchmark_tasks.py OUTDIR=bert_v6_contrastive/analysis/benchmark_embeddings echo "" echo "========== V5 Embeddings (val+test) ==========" python3 $SCRIPT --model v5 --splits val test --method both --output_dir $OUTDIR echo "" echo "========== V5 ALL data (incl train) ==========" python3 $SCRIPT --model v5 --embed_all --method both --output_dir ${OUTDIR}_all # V6 only if checkpoint exists if [ -f "bert_v6_contrastive/checkpoints/phase_3_hard_checkpoint.pt" ] || \ [ -f "bert_v6_contrastive/checkpoints/best_model.pt" ] || \ [ -f "bert_v6_contrastive/checkpoints/checkpoint_latest.pt" ]; then echo "" echo "========== V6 Embeddings (val+test) ==========" python3 $SCRIPT --model v6 --splits val test --method both --output_dir $OUTDIR echo "" echo "========== V6 ALL data (incl train) ==========" python3 $SCRIPT --model v6 --embed_all --method both --output_dir ${OUTDIR}_all else echo "" echo "SKIPPING V6: No checkpoint found (training incomplete)" fi echo "" echo "Generated files:" ls -la $OUTDIR/ 2>/dev/null ls -la ${OUTDIR}_all/ 2>/dev/null echo "" echo "Completed: $(date)"