supanthadey1
/

bertose-affinose-training-code

reproducibility

Model card Files Files and versions

bertose-affinose-training-code / code /probes /cluster_scripts /run_benchmark_embeddings.sh

supanthadey1's picture

Add BERTose and AFFINose training code release

1d6f391 verified about 1 month ago

History Blame Contribute Delete

1.96 kB

	#!/bin/bash
	#SBATCH --job-name=bench_embed
	#SBATCH --partition=nova
	#SBATCH --gres=gpu:1
	#SBATCH --cpus-per-task=8
	#SBATCH --mem=32G
	#SBATCH --time=01:00:00
	#SBATCH --output=bert_v6_contrastive/scripts/bench_embed_%j.out
	#SBATCH --error=bert_v6_contrastive/scripts/bench_embed_%j.err

	echo "========================================"
	echo "Benchmark Embedding Extraction + Visualization"
	echo "Job ID: $SLURM_JOB_ID"
	echo "Node: $(hostname)"
	echo "Started: $(date)"
	echo "========================================"

	cd /work/ratul1/supantha/glycan-SD-VS/bert_training_v3/v3.1_cluster_training
	source /work/ratul1/supantha/miniconda3/etc/profile.d/conda.sh
	conda activate glycan_bert 2>/dev/null \|\| source activate glycan_bert 2>/dev/null \|\| true
	pip install -q scikit-learn umap-learn 2>/dev/null

	SCRIPT=bert_v6_contrastive/scripts/embed_benchmark_tasks.py
	OUTDIR=bert_v6_contrastive/analysis/benchmark_embeddings

	echo ""
	echo "========== V5 Embeddings (val+test) =========="
	python3 $SCRIPT --model v5 --splits val test --method both --output_dir $OUTDIR

	echo ""
	echo "========== V5 ALL data (incl train) =========="
	python3 $SCRIPT --model v5 --embed_all --method both --output_dir ${OUTDIR}_all

	# V6 only if checkpoint exists
	if [ -f "bert_v6_contrastive/checkpoints/phase_3_hard_checkpoint.pt" ] \|\| \
	[ -f "bert_v6_contrastive/checkpoints/best_model.pt" ] \|\| \
	[ -f "bert_v6_contrastive/checkpoints/checkpoint_latest.pt" ]; then
	echo ""
	echo "========== V6 Embeddings (val+test) =========="
	python3 $SCRIPT --model v6 --splits val test --method both --output_dir $OUTDIR
	echo ""
	echo "========== V6 ALL data (incl train) =========="
	python3 $SCRIPT --model v6 --embed_all --method both --output_dir ${OUTDIR}_all
	else
	echo ""
	echo "SKIPPING V6: No checkpoint found (training incomplete)"
	fi

	echo ""
	echo "Generated files:"
	ls -la $OUTDIR/ 2>/dev/null
	ls -la ${OUTDIR}_all/ 2>/dev/null
	echo ""
	echo "Completed: $(date)"