Upload train_qwen_sovereign.py with huggingface_hub
Browse files- train_qwen_sovereign.py +7 -10
train_qwen_sovereign.py
CHANGED
|
@@ -21,12 +21,12 @@ def get_config(run_name, hub_model_id):
|
|
| 21 |
logging_steps=1,
|
| 22 |
save_strategy="steps",
|
| 23 |
save_steps=100,
|
| 24 |
-
report_to="none",
|
| 25 |
run_name=run_name,
|
| 26 |
bf16=True,
|
| 27 |
max_seq_length=1024,
|
| 28 |
gradient_checkpointing=True,
|
| 29 |
-
optim="paged_adamw_8bit",
|
| 30 |
max_grad_norm=0.3,
|
| 31 |
warmup_ratio=0.03,
|
| 32 |
lr_scheduler_type="constant",
|
|
@@ -47,14 +47,11 @@ def train_model(model_id, run_name, hub_model_id):
|
|
| 47 |
"""Executes the training job."""
|
| 48 |
print(f"📦 Starting training for {model_id}...")
|
| 49 |
|
| 50 |
-
#
|
| 51 |
try:
|
| 52 |
-
trackio.init(
|
| 53 |
-
except
|
| 54 |
-
|
| 55 |
-
trackio.init(experiment="sovereign-qwen-finetuning")
|
| 56 |
-
except TypeError:
|
| 57 |
-
trackio.init()
|
| 58 |
|
| 59 |
# Configure 4-bit loading for all models to ensure stability
|
| 60 |
bnb_config = BitsAndBytesConfig(
|
|
@@ -74,7 +71,7 @@ def train_model(model_id, run_name, hub_model_id):
|
|
| 74 |
device_map="auto",
|
| 75 |
torch_dtype=torch.bfloat16,
|
| 76 |
trust_remote_code=True,
|
| 77 |
-
attn_implementation="sdpa",
|
| 78 |
)
|
| 79 |
|
| 80 |
print("📥 Loading dataset and applying chat template...")
|
|
|
|
| 21 |
logging_steps=1,
|
| 22 |
save_strategy="steps",
|
| 23 |
save_steps=100,
|
| 24 |
+
report_to="none",
|
| 25 |
run_name=run_name,
|
| 26 |
bf16=True,
|
| 27 |
max_seq_length=1024,
|
| 28 |
gradient_checkpointing=True,
|
| 29 |
+
optim="paged_adamw_8bit",
|
| 30 |
max_grad_norm=0.3,
|
| 31 |
warmup_ratio=0.03,
|
| 32 |
lr_scheduler_type="constant",
|
|
|
|
| 47 |
"""Executes the training job."""
|
| 48 |
print(f"📦 Starting training for {model_id}...")
|
| 49 |
|
| 50 |
+
# Simple initialization for trackio 0.20.2
|
| 51 |
try:
|
| 52 |
+
trackio.init("sovereign-qwen-finetuning")
|
| 53 |
+
except Exception as e:
|
| 54 |
+
print(f"⚠️ Trackio init failed: {e}")
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
# Configure 4-bit loading for all models to ensure stability
|
| 57 |
bnb_config = BitsAndBytesConfig(
|
|
|
|
| 71 |
device_map="auto",
|
| 72 |
torch_dtype=torch.bfloat16,
|
| 73 |
trust_remote_code=True,
|
| 74 |
+
attn_implementation="sdpa",
|
| 75 |
)
|
| 76 |
|
| 77 |
print("📥 Loading dataset and applying chat template...")
|