ground-zero / scripts /runpod_setup.sh
jefffffff9
Add torchcodec install for datasets 4.x audio decoding
bb78cbf
#!/usr/bin/env bash
# -----------------------------------------------------------------------------
# Sahel-Voice-Core — RunPod bootstrap
# -----------------------------------------------------------------------------
# Run this once inside a fresh RunPod pod (PyTorch 2.2 + CUDA 12.1 template).
# It clones the repo into /workspace, installs training-only dependencies
# (the HF Space requirements.txt is runtime-only), and prepares secrets.
#
# Usage inside the pod's Jupyter terminal:
#
# curl -fsSL https://huggingface.co/spaces/ous-sow/sahel-agri-voice/resolve/main/scripts/runpod_setup.sh -o setup.sh
# bash setup.sh
#
# Or, if you've already cloned the repo:
#
# bash /workspace/sahel-voice/scripts/runpod_setup.sh
#
# After setup, export HF_TOKEN and open notebooks/kaggle_master_trainer.ipynb.
# Cell 3 auto-detects the RunPod environment; no path edits needed.
# -----------------------------------------------------------------------------
set -euo pipefail
REPO_URL="${REPO_URL:-https://huggingface.co/spaces/ous-sow/sahel-agri-voice}"
WORKSPACE="${WORKSPACE:-/workspace}"
REPO_DIR="${REPO_DIR:-${WORKSPACE}/sahel-voice}"
echo "=============================================="
echo " Sahel-Voice-Core — RunPod setup"
echo "=============================================="
echo " Workspace : ${WORKSPACE}"
echo " Repo : ${REPO_DIR}"
echo "=============================================="
# 1. Clone (idempotent)
if [[ ! -d "${REPO_DIR}/.git" ]]; then
echo ">> Cloning repo..."
git clone "${REPO_URL}" "${REPO_DIR}"
else
echo ">> Repo already present — pulling latest."
git -C "${REPO_DIR}" pull --ff-only || true
fi
cd "${REPO_DIR}"
# 2. Training dependencies (not in requirements.txt which is runtime-only)
echo ">> Installing training dependencies..."
pip install -q --upgrade pip
pip install -q \
"transformers==5.5.0" \
"datasets==4.8.4" \
"accelerate==1.13.0" \
"huggingface-hub==1.9.0" \
"peft>=0.13.0" \
"evaluate>=0.4.1" \
"jiwer==3.0.4" \
"librosa==0.10.2" \
"soundfile==0.12.1" \
"tensorboard>=2.14" \
"pypdf>=4.0.0" \
"python-docx>=1.1.0"
# torchcodec — required by datasets>=4.0 for audio decoding.
# Version must match the installed torch; let pip resolve, fallback to pinned.
echo ">> Installing torchcodec (audio backend for datasets 4.x)..."
pip install -q torchcodec || {
TORCH_VER=$(python -c "import torch; print(torch.__version__.split('+')[0])" 2>/dev/null || echo "unknown")
echo " pip resolve failed; torch=${TORCH_VER}. Trying pinned versions..."
case "${TORCH_VER}" in
2.4.*) pip install -q "torchcodec==0.1.*" ;;
2.5.*) pip install -q "torchcodec==0.2.*" ;;
2.6.*) pip install -q "torchcodec==0.3.*" ;;
2.7.*|2.8.*) pip install -q "torchcodec==0.4.*" ;;
*) echo " ⚠️ Unknown torch version — install torchcodec manually." ;;
esac
}
# 3. HF token prompt (one-time)
ENV_FILE="${REPO_DIR}/.env"
if [[ -z "${HF_TOKEN:-}" ]] && [[ ! -f "${ENV_FILE}" ]]; then
echo ""
echo "=============================================="
echo " HF_TOKEN not set."
echo " Get a write-scoped token from"
echo " https://huggingface.co/settings/tokens"
echo " Then either:"
echo " export HF_TOKEN=hf_xxxxxxxx"
echo " or add it to ${ENV_FILE}:"
echo " echo 'HF_TOKEN=hf_xxxxxxxx' > ${ENV_FILE}"
echo "=============================================="
elif [[ -f "${ENV_FILE}" ]]; then
# Source it so this shell has HF_TOKEN available for downstream commands
set -a
# shellcheck disable=SC1090
source "${ENV_FILE}"
set +a
echo ">> Loaded env vars from ${ENV_FILE}"
fi
# 4. Persistent output dir for checkpoints (survives pod stop via Volume disk)
mkdir -p "${WORKSPACE}/adapter_bam" "${WORKSPACE}/adapter_ful" \
"${WORKSPACE}/data" "${WORKSPACE}/audio_feedback"
# 5. GPU sanity check
python - <<'PY'
import torch
print("=" * 46)
print(f" PyTorch : {torch.__version__}")
print(f" CUDA available : {torch.cuda.is_available()}")
if torch.cuda.is_available():
p = torch.cuda.get_device_properties(0)
print(f" GPU : {p.name}")
print(f" VRAM : {p.total_memory/1e9:.1f} GB")
print(f" Compute cap : {p.major}.{p.minor}")
print("=" * 46)
PY
echo ""
echo "✅ Setup complete."
echo ""
echo "Next steps:"
echo " 1. Open Jupyter Lab (port 8888 on the pod)"
echo " 2. Navigate to: ${REPO_DIR}/notebooks/kaggle_master_trainer.ipynb"
echo " 3. Set TRAIN_LANG in Cell 3 (or export TRAIN_LANG=ful before launching)"
echo " 4. Run All Cells — Cell 3 auto-detects /workspace and uses RunPod defaults"
echo ""
echo "Checkpoints will be saved to: ${WORKSPACE}/adapter_\$TRAIN_LANG"
echo "This path is on the Volume disk — survives pod stop/restart."