| { |
| "base_model": "Qwen/Qwen2.5-1.5B-Instruct", |
| "train_file": "data/train.jsonl", |
| "valid_file": "data/valid.jsonl", |
| "output_dir": "outputs/GravityLLM-Qwen2.5-1.5B-S9", |
| "max_length": 2048, |
| "num_train_epochs": 3, |
| "learning_rate": 0.0002, |
| "train_batch_size": 1, |
| "eval_batch_size": 1, |
| "gradient_accumulation_steps": 16, |
| "warmup_ratio": 0.03, |
| "weight_decay": 0.0, |
| "logging_steps": 10, |
| "save_steps": 100, |
| "eval_steps": 100, |
| "seed": 42, |
| "qlora": true, |
| "bf16": true, |
| "lora_r": 16, |
| "lora_alpha": 32, |
| "lora_dropout": 0.05 |
| } |