#!/usr/bin/env bash # ----------------------------------------------------------------------------- # Sahel-Voice-Core — RunPod bootstrap # ----------------------------------------------------------------------------- # Run this once inside a fresh RunPod pod (PyTorch 2.2 + CUDA 12.1 template). # It clones the repo into /workspace, installs training-only dependencies # (the HF Space requirements.txt is runtime-only), and prepares secrets. # # Usage inside the pod's Jupyter terminal: # # curl -fsSL https://huggingface.co/spaces/ous-sow/sahel-agri-voice/resolve/main/scripts/runpod_setup.sh -o setup.sh # bash setup.sh # # Or, if you've already cloned the repo: # # bash /workspace/sahel-voice/scripts/runpod_setup.sh # # After setup, export HF_TOKEN and open notebooks/kaggle_master_trainer.ipynb. # Cell 3 auto-detects the RunPod environment; no path edits needed. # ----------------------------------------------------------------------------- set -euo pipefail REPO_URL="${REPO_URL:-https://huggingface.co/spaces/ous-sow/sahel-agri-voice}" WORKSPACE="${WORKSPACE:-/workspace}" REPO_DIR="${REPO_DIR:-${WORKSPACE}/sahel-voice}" echo "==============================================" echo " Sahel-Voice-Core — RunPod setup" echo "==============================================" echo " Workspace : ${WORKSPACE}" echo " Repo : ${REPO_DIR}" echo "==============================================" # 1. Clone (idempotent) if [[ ! -d "${REPO_DIR}/.git" ]]; then echo ">> Cloning repo..." git clone "${REPO_URL}" "${REPO_DIR}" else echo ">> Repo already present — pulling latest." git -C "${REPO_DIR}" pull --ff-only || true fi cd "${REPO_DIR}" # 2. Training dependencies (not in requirements.txt which is runtime-only) echo ">> Installing training dependencies..." pip install -q --upgrade pip pip install -q \ "transformers==5.5.0" \ "datasets==4.8.4" \ "accelerate==1.13.0" \ "huggingface-hub==1.9.0" \ "peft>=0.13.0" \ "evaluate>=0.4.1" \ "jiwer==3.0.4" \ "librosa==0.10.2" \ "soundfile==0.12.1" \ "tensorboard>=2.14" \ "pypdf>=4.0.0" \ "python-docx>=1.1.0" # torchcodec — required by datasets>=4.0 for audio decoding. # Version must match the installed torch; let pip resolve, fallback to pinned. echo ">> Installing torchcodec (audio backend for datasets 4.x)..." pip install -q torchcodec || { TORCH_VER=$(python -c "import torch; print(torch.__version__.split('+')[0])" 2>/dev/null || echo "unknown") echo " pip resolve failed; torch=${TORCH_VER}. Trying pinned versions..." case "${TORCH_VER}" in 2.4.*) pip install -q "torchcodec==0.1.*" ;; 2.5.*) pip install -q "torchcodec==0.2.*" ;; 2.6.*) pip install -q "torchcodec==0.3.*" ;; 2.7.*|2.8.*) pip install -q "torchcodec==0.4.*" ;; *) echo " ⚠️ Unknown torch version — install torchcodec manually." ;; esac } # 3. HF token prompt (one-time) ENV_FILE="${REPO_DIR}/.env" if [[ -z "${HF_TOKEN:-}" ]] && [[ ! -f "${ENV_FILE}" ]]; then echo "" echo "==============================================" echo " HF_TOKEN not set." echo " Get a write-scoped token from" echo " https://huggingface.co/settings/tokens" echo " Then either:" echo " export HF_TOKEN=hf_xxxxxxxx" echo " or add it to ${ENV_FILE}:" echo " echo 'HF_TOKEN=hf_xxxxxxxx' > ${ENV_FILE}" echo "==============================================" elif [[ -f "${ENV_FILE}" ]]; then # Source it so this shell has HF_TOKEN available for downstream commands set -a # shellcheck disable=SC1090 source "${ENV_FILE}" set +a echo ">> Loaded env vars from ${ENV_FILE}" fi # 4. Persistent output dir for checkpoints (survives pod stop via Volume disk) mkdir -p "${WORKSPACE}/adapter_bam" "${WORKSPACE}/adapter_ful" \ "${WORKSPACE}/data" "${WORKSPACE}/audio_feedback" # 5. GPU sanity check python - <<'PY' import torch print("=" * 46) print(f" PyTorch : {torch.__version__}") print(f" CUDA available : {torch.cuda.is_available()}") if torch.cuda.is_available(): p = torch.cuda.get_device_properties(0) print(f" GPU : {p.name}") print(f" VRAM : {p.total_memory/1e9:.1f} GB") print(f" Compute cap : {p.major}.{p.minor}") print("=" * 46) PY echo "" echo "✅ Setup complete." echo "" echo "Next steps:" echo " 1. Open Jupyter Lab (port 8888 on the pod)" echo " 2. Navigate to: ${REPO_DIR}/notebooks/kaggle_master_trainer.ipynb" echo " 3. Set TRAIN_LANG in Cell 3 (or export TRAIN_LANG=ful before launching)" echo " 4. Run All Cells — Cell 3 auto-detects /workspace and uses RunPod defaults" echo "" echo "Checkpoints will be saved to: ${WORKSPACE}/adapter_\$TRAIN_LANG" echo "This path is on the Volume disk — survives pod stop/restart."