| #!/usr/bin/env bash |
| |
| set -euo pipefail |
| MODEL="${1:?model letter required}" |
| RUN_NAME="${2:?run name required}" |
|
|
| echo "[bootstrap] model=$MODEL run=$RUN_NAME" |
| cd /workspace |
| REPO_DIR="" |
| for d in PhysioJEPA physiojepa; do |
| if [ -d "$d" ]; then REPO_DIR="$d"; break; fi |
| done |
| [ -n "$REPO_DIR" ] || { echo "no repo dir found at /workspace/{PhysioJEPA,physiojepa}"; exit 1; } |
| cd "$REPO_DIR" |
|
|
| |
| |
| PY=/usr/bin/python3 |
| $PY -m pip install --quiet --upgrade pip |
| $PY -m pip install --quiet \ |
| 'datasets>=4.8.4' 'einops>=0.8.2' 'matplotlib>=3.10.0' \ |
| 'neurokit2>=0.2.13' 'python-dotenv>=1.0' 'pyyaml>=6.0' \ |
| 'scikit-learn>=1.5' 'scipy>=1.13' 'tqdm>=4.66' \ |
| 'wandb>=0.18' 'wfdb>=4.3.1' 'huggingface_hub>=0.25' 'requests' |
| RUN_PY="$PY" |
|
|
| |
| if [ -f /workspace/.env ]; then |
| cp /workspace/.env .env |
| fi |
|
|
| |
| if [ ! -f /workspace/cache/mimic_index.json ]; then |
| echo "[bootstrap] downloading MIMIC shards + building index" |
| PYTHONPATH=src $RUN_PY scripts/prepare_data.py \ |
| --root /workspace/cache/mimic \ |
| --index /workspace/cache/mimic_index.json |
| fi |
|
|
| |
| PYTHONPATH=src $RUN_PY -c " |
| import json, pathlib |
| roots = sorted([str(p) for p in pathlib.Path('/workspace/cache/mimic').glob('shard_*') |
| if (p / 'dataset_info.json').exists()]) |
| pathlib.Path('/workspace/cache/shard_roots.json').write_text(json.dumps(roots)) |
| print('shards:', len(roots)) |
| " |
|
|
| |
| echo "[bootstrap] launching training: model=$MODEL" |
| PYTHONPATH=src PYTHONUNBUFFERED=1 $RUN_PY -u scripts/train.py \ |
| --config configs/base.yaml \ |
| --model "$MODEL" \ |
| --run_name "$RUN_NAME" \ |
| --epochs 25 \ |
| --shard_roots_json /workspace/cache/shard_roots.json \ |
| --index_path /workspace/cache/mimic_index.json \ |
| --output_dir /workspace/runs \ |
| --num_workers 8 \ |
| --subset_frac 0.10 \ |
| --log_every 25 \ |
| 2>&1 | tee "/workspace/runs/${RUN_NAME}.log" |
|
|
| echo "[bootstrap] done" |
|
|