Spaces:

MataStrategy
/

ground-zero

Running

jefffffff9

Add torchcodec install for datasets 4.x audio decoding

bb78cbf 13 days ago

4.84 kB

	#!/usr/bin/env bash
	# -----------------------------------------------------------------------------
	# Sahel-Voice-Core — RunPod bootstrap
	# -----------------------------------------------------------------------------
	# Run this once inside a fresh RunPod pod (PyTorch 2.2 + CUDA 12.1 template).
	# It clones the repo into /workspace, installs training-only dependencies
	# (the HF Space requirements.txt is runtime-only), and prepares secrets.
	#
	# Usage inside the pod's Jupyter terminal:
	#
	# curl -fsSL https://huggingface.co/spaces/ous-sow/sahel-agri-voice/resolve/main/scripts/runpod_setup.sh -o setup.sh
	# bash setup.sh
	#
	# Or, if you've already cloned the repo:
	#
	# bash /workspace/sahel-voice/scripts/runpod_setup.sh
	#
	# After setup, export HF_TOKEN and open notebooks/kaggle_master_trainer.ipynb.
	# Cell 3 auto-detects the RunPod environment; no path edits needed.
	# -----------------------------------------------------------------------------
	set -euo pipefail

	REPO_URL="${REPO_URL:-https://huggingface.co/spaces/ous-sow/sahel-agri-voice}"
	WORKSPACE="${WORKSPACE:-/workspace}"
	REPO_DIR="${REPO_DIR:-${WORKSPACE}/sahel-voice}"

	echo "=============================================="
	echo " Sahel-Voice-Core — RunPod setup"
	echo "=============================================="
	echo " Workspace : ${WORKSPACE}"
	echo " Repo : ${REPO_DIR}"
	echo "=============================================="

	# 1. Clone (idempotent)
	if [[ ! -d "${REPO_DIR}/.git" ]]; then
	echo ">> Cloning repo..."
	git clone "${REPO_URL}" "${REPO_DIR}"
	else
	echo ">> Repo already present — pulling latest."
	git -C "${REPO_DIR}" pull --ff-only \|\| true
	fi

	cd "${REPO_DIR}"

	# 2. Training dependencies (not in requirements.txt which is runtime-only)
	echo ">> Installing training dependencies..."
	pip install -q --upgrade pip
	pip install -q \
	"transformers==5.5.0" \
	"datasets==4.8.4" \
	"accelerate==1.13.0" \
	"huggingface-hub==1.9.0" \
	"peft>=0.13.0" \
	"evaluate>=0.4.1" \
	"jiwer==3.0.4" \
	"librosa==0.10.2" \
	"soundfile==0.12.1" \
	"tensorboard>=2.14" \
	"pypdf>=4.0.0" \
	"python-docx>=1.1.0"

	# torchcodec — required by datasets>=4.0 for audio decoding.
	# Version must match the installed torch; let pip resolve, fallback to pinned.
	echo ">> Installing torchcodec (audio backend for datasets 4.x)..."
	pip install -q torchcodec \|\| {
	TORCH_VER=$(python -c "import torch; print(torch.__version__.split('+')[0])" 2>/dev/null \|\| echo "unknown")
	echo " pip resolve failed; torch=${TORCH_VER}. Trying pinned versions..."
	case "${TORCH_VER}" in
	2.4.) pip install -q "torchcodec==0.1." ;;
	2.5.) pip install -q "torchcodec==0.2." ;;
	2.6.) pip install -q "torchcodec==0.3." ;;
	2.7.\|2.8.) pip install -q "torchcodec==0.4.*" ;;
	*) echo " ⚠️ Unknown torch version — install torchcodec manually." ;;
	esac
	}

	# 3. HF token prompt (one-time)
	ENV_FILE="${REPO_DIR}/.env"
	if [[ -z "${HF_TOKEN:-}" ]] && [[ ! -f "${ENV_FILE}" ]]; then
	echo ""
	echo "=============================================="
	echo " HF_TOKEN not set."
	echo " Get a write-scoped token from"
	echo " https://huggingface.co/settings/tokens"
	echo " Then either:"
	echo " export HF_TOKEN=hf_xxxxxxxx"
	echo " or add it to ${ENV_FILE}:"
	echo " echo 'HF_TOKEN=hf_xxxxxxxx' > ${ENV_FILE}"
	echo "=============================================="
	elif [[ -f "${ENV_FILE}" ]]; then
	# Source it so this shell has HF_TOKEN available for downstream commands
	set -a
	# shellcheck disable=SC1090
	source "${ENV_FILE}"
	set +a
	echo ">> Loaded env vars from ${ENV_FILE}"
	fi

	# 4. Persistent output dir for checkpoints (survives pod stop via Volume disk)
	mkdir -p "${WORKSPACE}/adapter_bam" "${WORKSPACE}/adapter_ful" \
	"${WORKSPACE}/data" "${WORKSPACE}/audio_feedback"

	# 5. GPU sanity check
	python - <<'PY'
	import torch
	print("=" * 46)
	print(f" PyTorch : {torch.__version__}")
	print(f" CUDA available : {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	p = torch.cuda.get_device_properties(0)
	print(f" GPU : {p.name}")
	print(f" VRAM : {p.total_memory/1e9:.1f} GB")
	print(f" Compute cap : {p.major}.{p.minor}")
	print("=" * 46)
	PY

	echo ""
	echo "✅ Setup complete."
	echo ""
	echo "Next steps:"
	echo " 1. Open Jupyter Lab (port 8888 on the pod)"
	echo " 2. Navigate to: ${REPO_DIR}/notebooks/kaggle_master_trainer.ipynb"
	echo " 3. Set TRAIN_LANG in Cell 3 (or export TRAIN_LANG=ful before launching)"
	echo " 4. Run All Cells — Cell 3 auto-detects /workspace and uses RunPod defaults"
	echo ""
	echo "Checkpoints will be saved to: ${WORKSPACE}/adapter_\$TRAIN_LANG"
	echo "This path is on the Volume disk — survives pod stop/restart."