OminiControlRotation / nl_tasks /scripts /train_cms_reasoning.sh
nvan15's picture
Batch upload part 2
6bb0065 verified
export OMINI_CONFIG=./config/commonsense.yaml
#echo $OMINI_CONFIG
export TOKENIZERS_PARALLELISM=true
# CUDA Include (/cuda.h)
CUDA_INCLUDE_PATH="/home/work/miniconda3/envs/allm/include"
# 3. Add into CPATH & CPLUS_INCLUDE_PATH (C/C++ compiler)
export CPATH=$CPATH:$CUDA_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$CUDA_INCLUDE_PATH
# echo "CPATH is set to: $CPATH"
# echo "CPLUS_INCLUDE_PATH is set to: $CPLUS_INCLUDE_PATH"
export WANDB_PROJECT="Llama2_7B_FT_Math40k_2"
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
export OPENBLAS_NUM_THREADS=1
export NUMEXPR_NUM_THREADS=1
date +"%F %T"
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex01" --trainer_args.learning_rate=5e-5 \
# --rotation_adapter_config.num_rotations 2 --rotation_adapter_config.r 8
# sleep 5
# echo "1st exp finishes"
# date +"%F %T"
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex02" --trainer_args.learning_rate=5e-4 \
# --rotation_adapter_config.num_rotations 2 --rotation_adapter_config.r 8
# sleep 5
# echo "2nd exp finishes"
# date +"%F %T"
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex03" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 2 --rotation_adapter_config.r 8
# sleep 5
# echo "3rd exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex04" --trainer_args.learning_rate=2e-3 \
# --rotation_adapter_config.num_rotations 4 --rotation_adapter_config.r 4
# sleep 5
# echo "4th exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex05" --trainer_args.learning_rate=2e-3 \
# --rotation_adapter_config.num_rotations 2 --rotation_adapter_config.r 8
# sleep 5
# echo "5th exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex06" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 4 --rotation_adapter_config.r 4
# sleep 5
# echo "6th exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_exps7" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16
# sleep 5
# echo "7th exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex08" --trainer_args.learning_rate=2e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16
# sleep 5
# echo "8th exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex09" --trainer_args.learning_rate=2e-3 \
# --rotation_adapter_config.num_rotations 16 --rotation_adapter_config.r 1
# sleep 5
# echo "9th exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex10" --trainer_args.learning_rate=2e-3 \
# --rotation_adapter_config.num_rotations 8 --rotation_adapter_config.r 2
# sleep 5
# echo "10 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex11" --trainer_args.learning_rate=1e-2 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16
# sleep 5
# echo "11 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex12" --trainer_args.learning_rate=1e-2 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'u=v,def'
# sleep 5
# echo "12 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
### continue with 40k
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex13_3ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def' \
# --trainer_args.num_train_epochs 3.0 --trainer_args.eval_steps 200
# sleep 5
# echo "13 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex14_3ep" --trainer_args.learning_rate=2e-4 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def' \
# --trainer_args.num_train_epochs 3.0 --trainer_args.eval_steps 200
# sleep 5
# echo "14 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex15_3ep" --trainer_args.learning_rate=5e-4 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def' \
# --trainer_args.num_train_epochs 3.0 --trainer_args.eval_steps 200
# sleep 5
# echo "15 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex16_3ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.05' \
# --trainer_args.num_train_epochs 3.0 --trainer_args.eval_steps 200
# sleep 5
# echo "15 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex17_3ep" --trainer_args.learning_rate=2e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.05' \
# --trainer_args.num_train_epochs 3.0 --trainer_args.eval_steps 200
# sleep 5
# echo "15 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex18_2ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 200
# sleep 5
# echo "15 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# accelerate launch --main_process_port 41353 -m src.ft_mathR \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex19_2ep" --trainer_args.learning_rate=5e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 200
# sleep 5
# echo "19 exp finishes"
# date +"%F %T"
# wandb sync wandb/latest-run
# 140126
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex20_2ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 11
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex21_2ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 10
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex24_3ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 3.0 --trainer_args.eval_steps 100 --seed 10
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex25_3ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 3.0 --trainer_args.eval_steps 100 --seed 12
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex22_2ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 12
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex23_3ep" --trainer_args.learning_rate=1e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 3.0 --trainer_args.eval_steps 100 --seed 11
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex26_2ep" --trainer_args.learning_rate=8e-4 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 11
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex27_2ep" --trainer_args.learning_rate=8e-4 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 10
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex28_2ep" --trainer_args.learning_rate=2e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 11
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exps/run_ex29_2ep" --trainer_args.learning_rate=2e-3 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 10
accelerate launch --main_process_port 41353 -m src.ft_mathQ \
--config_path $OMINI_CONFIG --trainer_args.output_dir "./exprep/run_ex30" --trainer_args.learning_rate=8e-4 \
--rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
--trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 20
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exprep/run_ex31" --trainer_args.learning_rate=8e-4 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 21
# accelerate launch --main_process_port 41353 -m src.ft_mathQ \
# --config_path $OMINI_CONFIG --trainer_args.output_dir "./exprep/run_ex32" --trainer_args.learning_rate=8e-4 \
# --rotation_adapter_config.num_rotations 1 --rotation_adapter_config.r 16 --run_text 'init=def|dr0.10' \
# --trainer_args.num_train_epochs 2.0 --trainer_args.eval_steps 100 --seed 22