kmchiti commited on
Commit
ef56d7d
·
verified ·
1 Parent(s): 180db9a

Upload artifacts/training/scripts/run_pretrain_id2-10_0.25easy_0.25medium_0.5hard.sh

Browse files
artifacts/training/scripts/run_pretrain_id2-10_0.25easy_0.25medium_0.5hard.sh ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ REPO_ROOT="$(cd "${SCRIPT_DIR}/../../../.." && pwd)"
6
+ cd "${REPO_ROOT}"
7
+
8
+ # llamafactory-cli launches distributed training via `torchrun`, so the venv
9
+ # bin dir must be on PATH even when the CLI itself is invoked by absolute path.
10
+ export PATH="${REPO_ROOT}/.venv/bin:${PATH}"
11
+
12
+ if [[ -n "${SLURM_CPUS_PER_TASK:-}" ]]; then
13
+ CPU_WORKERS_DEFAULT="${SLURM_CPUS_PER_TASK}"
14
+ elif command -v nproc >/dev/null 2>&1; then
15
+ CPU_WORKERS_DEFAULT="$(nproc)"
16
+ else
17
+ CPU_WORKERS_DEFAULT=1
18
+ fi
19
+
20
+ DEFAULT_PREPROCESSING_WORKERS="${PREPROCESSING_NUM_WORKERS:-${CPU_WORKERS_DEFAULT}}"
21
+ DEFAULT_DATALOADER_WORKERS="${DATALOADER_NUM_WORKERS:-${CPU_WORKERS_DEFAULT}}"
22
+
23
+ if [[ -z "${CUDA_VISIBLE_DEVICES:-}" ]]; then
24
+ echo "CUDA_VISIBLE_DEVICES must be set before running this script" >&2
25
+ exit 1
26
+ fi
27
+
28
+ LLAMA_BIN_DEFAULT="${REPO_ROOT}/.venv/bin/llamafactory-cli"
29
+ DATASET_DIR_ROOT="${DATASET_DIR_ROOT:-data}"
30
+ if [[ ! -x "${LLAMA_BIN_DEFAULT}" ]]; then
31
+ echo "Missing ${LLAMA_BIN_DEFAULT}. Run scripts/setup/install_local_llamafactory.sh first." >&2
32
+ exit 1
33
+ fi
34
+
35
+ if [[ ! -d "${REPO_ROOT}/${DATASET_DIR_ROOT}/composition/train" ]]; then
36
+ echo "Missing ${DATASET_DIR_ROOT}/composition/train. Run scripts/composition/prepare_hf_composition_data.sh first." >&2
37
+ exit 1
38
+ fi
39
+
40
+ if [[ ! -d "${REPO_ROOT}/${DATASET_DIR_ROOT}/composition/test" ]]; then
41
+ echo "Missing ${DATASET_DIR_ROOT}/composition/test. Run scripts/composition/prepare_hf_composition_data.sh first." >&2
42
+ exit 1
43
+ fi
44
+
45
+ export WANDB_PROJECT="${WANDB_PROJECT:-Interplay-LM-Reasoning}"
46
+ export WANDB_ENTITY="${WANDB_ENTITY:-kmchiti}"
47
+
48
+ DEFAULT_LLAMA_ARGS=(
49
+ "preprocessing_num_workers=${DEFAULT_PREPROCESSING_WORKERS}"
50
+ "dataloader_num_workers=${DEFAULT_DATALOADER_WORKERS}"
51
+ )
52
+
53
+ if [[ -n "${LLAMA_EXTRA_ARGS:-}" ]]; then
54
+ export LLAMA_EXTRA_ARGS="${DEFAULT_LLAMA_ARGS[*]} ${LLAMA_EXTRA_ARGS}"
55
+ else
56
+ export LLAMA_EXTRA_ARGS="${DEFAULT_LLAMA_ARGS[*]}"
57
+ fi
58
+
59
+ EVAL_DATA_ROOT="${EVAL_DATA_ROOT:-${DATASET_DIR_ROOT}/composition/test}" \
60
+ LLAMA_BIN="${LLAMA_BIN:-${LLAMA_BIN_DEFAULT}}" \
61
+ LLAMA_CONFIG="scripts/composition/op-difficulty-10B/pt-diff2_10-tok10B-lr1e-4-bs512k-schedcos-minlr3e-5/id2-10_0.25easy_0.25medium_0.5hard.yaml" \
62
+ ./scripts/meta_run.sh --skip-rl "$@"