mxguru1 commited on
Commit
cd6db2d
·
verified ·
1 Parent(s): 9441121

Upload train_qwen_sovereign.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_qwen_sovereign.py +7 -10
train_qwen_sovereign.py CHANGED
@@ -21,12 +21,12 @@ def get_config(run_name, hub_model_id):
21
  logging_steps=1,
22
  save_strategy="steps",
23
  save_steps=100,
24
- report_to="none", # Trackio handled manually
25
  run_name=run_name,
26
  bf16=True,
27
  max_seq_length=1024,
28
  gradient_checkpointing=True,
29
- optim="paged_adamw_8bit", # Critical for memory efficiency
30
  max_grad_norm=0.3,
31
  warmup_ratio=0.03,
32
  lr_scheduler_type="constant",
@@ -47,14 +47,11 @@ def train_model(model_id, run_name, hub_model_id):
47
  """Executes the training job."""
48
  print(f"📦 Starting training for {model_id}...")
49
 
50
- # Initialize trackio based on detected support for keywords
51
  try:
52
- trackio.init(project="sovereign-qwen-finetuning", run_name=run_name)
53
- except TypeError:
54
- try:
55
- trackio.init(experiment="sovereign-qwen-finetuning")
56
- except TypeError:
57
- trackio.init()
58
 
59
  # Configure 4-bit loading for all models to ensure stability
60
  bnb_config = BitsAndBytesConfig(
@@ -74,7 +71,7 @@ def train_model(model_id, run_name, hub_model_id):
74
  device_map="auto",
75
  torch_dtype=torch.bfloat16,
76
  trust_remote_code=True,
77
- attn_implementation="sdpa", # Use optimized attention
78
  )
79
 
80
  print("📥 Loading dataset and applying chat template...")
 
21
  logging_steps=1,
22
  save_strategy="steps",
23
  save_steps=100,
24
+ report_to="none",
25
  run_name=run_name,
26
  bf16=True,
27
  max_seq_length=1024,
28
  gradient_checkpointing=True,
29
+ optim="paged_adamw_8bit",
30
  max_grad_norm=0.3,
31
  warmup_ratio=0.03,
32
  lr_scheduler_type="constant",
 
47
  """Executes the training job."""
48
  print(f"📦 Starting training for {model_id}...")
49
 
50
+ # Simple initialization for trackio 0.20.2
51
  try:
52
+ trackio.init("sovereign-qwen-finetuning")
53
+ except Exception as e:
54
+ print(f"⚠️ Trackio init failed: {e}")
 
 
 
55
 
56
  # Configure 4-bit loading for all models to ensure stability
57
  bnb_config = BitsAndBytesConfig(
 
71
  device_map="auto",
72
  torch_dtype=torch.bfloat16,
73
  trust_remote_code=True,
74
+ attn_implementation="sdpa",
75
  )
76
 
77
  print("📥 Loading dataset and applying chat template...")