#!/usr/bin/env bash # Run on the RunPod pod. Args: set -euo pipefail MODEL="${1:?model letter required}" RUN_NAME="${2:?run name required}" echo "[bootstrap] model=$MODEL run=$RUN_NAME" cd /workspace REPO_DIR="" for d in PhysioJEPA physiojepa; do if [ -d "$d" ]; then REPO_DIR="$d"; break; fi done [ -n "$REPO_DIR" ] || { echo "no repo dir found at /workspace/{PhysioJEPA,physiojepa}"; exit 1; } cd "$REPO_DIR" # Use the image's system Python (already has torch 2.4.1+cu124 wired up). # Install only the extras we need into the system site-packages. PY=/usr/bin/python3 $PY -m pip install --quiet --upgrade pip $PY -m pip install --quiet \ 'datasets>=4.8.4' 'einops>=0.8.2' 'matplotlib>=3.10.0' \ 'neurokit2>=0.2.13' 'python-dotenv>=1.0' 'pyyaml>=6.0' \ 'scikit-learn>=1.5' 'scipy>=1.13' 'tqdm>=4.66' \ 'wandb>=0.18' 'wfdb>=4.3.1' 'huggingface_hub>=0.25' 'requests' RUN_PY="$PY" # Stage env keys (the launcher will have written /workspace/.env into the pod via send) if [ -f /workspace/.env ]; then cp /workspace/.env .env fi # Step 1: prepare data (idempotent) if [ ! -f /workspace/cache/mimic_index.json ]; then echo "[bootstrap] downloading MIMIC shards + building index" PYTHONPATH=src $RUN_PY scripts/prepare_data.py \ --root /workspace/cache/mimic \ --index /workspace/cache/mimic_index.json fi # write shard_roots json for trainer PYTHONPATH=src $RUN_PY -c " import json, pathlib roots = sorted([str(p) for p in pathlib.Path('/workspace/cache/mimic').glob('shard_*') if (p / 'dataset_info.json').exists()]) pathlib.Path('/workspace/cache/shard_roots.json').write_text(json.dumps(roots)) print('shards:', len(roots)) " # Step 2: train echo "[bootstrap] launching training: model=$MODEL" PYTHONPATH=src PYTHONUNBUFFERED=1 $RUN_PY -u scripts/train.py \ --config configs/base.yaml \ --model "$MODEL" \ --run_name "$RUN_NAME" \ --epochs 25 \ --shard_roots_json /workspace/cache/shard_roots.json \ --index_path /workspace/cache/mimic_index.json \ --output_dir /workspace/runs \ --num_workers 8 \ --subset_frac 0.10 \ --log_every 25 \ 2>&1 | tee "/workspace/runs/${RUN_NAME}.log" echo "[bootstrap] done"