| | """ |
| | tool_trainer_simple.py - Fine-tune SmolLM2-1.7B for dynamic function calling using LoRA |
| | |
| | This script uses supervised fine-tuning (SFT) instead of DPO, which is simpler and more |
| | compatible with current library versions while still teaching JSON-only responses. |
| | |
| | Key hyperparameters: |
| | - LoRA rank: 8 (small adapter for efficiency) |
| | - Epochs: 3 (enough to learn pattern without overfitting) |
| | - Learning rate: 5e-5 (conservative for stability) |
| | """ |
| |
|
| | import json |
| | import torch |
| | from transformers import ( |
| | AutoTokenizer, |
| | AutoModelForCausalLM, |
| | TrainingArguments, |
| | Trainer, |
| | DataCollatorForLanguageModeling |
| | ) |
| | from peft import LoraConfig, get_peft_model, TaskType |
| | from datasets import Dataset |
| | import os |
| |
|
| | def load_preference_pairs(file_path="tool_pairs.jsonl"): |
| | """Load and parse the JSONL preference pairs.""" |
| | pairs = [] |
| | with open(file_path, 'r') as f: |
| | for line in f: |
| | pairs.append(json.loads(line.strip())) |
| | return pairs |
| |
|
| | def format_for_sft(pairs, tokenizer): |
| | """Convert pairs to SFT format - use only the 'chosen' responses.""" |
| | formatted = [] |
| | for pair in pairs: |
| | |
| | full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token |
| | formatted.append({"text": full_text}) |
| | return formatted |
| |
|
| | def tokenize_function(examples, tokenizer, max_length=512): |
| | """Tokenize the training examples.""" |
| | |
| | tokenized = tokenizer( |
| | examples["text"], |
| | truncation=True, |
| | padding=False, |
| | max_length=max_length, |
| | return_tensors=None |
| | ) |
| | |
| | |
| | tokenized["labels"] = tokenized["input_ids"].copy() |
| | return tokenized |
| |
|
| | def main(): |
| | print("π Starting Dynamic Function-Calling Agent Training (SFT)") |
| | print("=" * 60) |
| | |
| | |
| | print("π₯ Loading SmolLM2-1.7B model and tokenizer...") |
| | model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct" |
| | |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| | if tokenizer.pad_token is None: |
| | tokenizer.pad_token = tokenizer.eos_token |
| | |
| | model = AutoModelForCausalLM.from_pretrained( |
| | model_name, |
| | torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
| | device_map="auto" if torch.cuda.is_available() else None, |
| | trust_remote_code=True |
| | ) |
| | |
| | print(f"β
Loaded model: {model_name}") |
| | print(f"π§ Model dtype: {model.dtype}") |
| | print(f"πΎ Model size: ~{sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters") |
| | |
| | |
| | print("\nπ© Setting up LoRA adapter (rank 8)...") |
| | lora_config = LoraConfig( |
| | r=8, |
| | lora_alpha=16, |
| | target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], |
| | lora_dropout=0.1, |
| | bias="none", |
| | task_type=TaskType.CAUSAL_LM |
| | ) |
| | |
| | model = get_peft_model(model, lora_config) |
| | trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| | total_params = sum(p.numel() for p in model.parameters()) |
| | |
| | print(f"β
LoRA adapter attached") |
| | print(f"π― Trainable parameters: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)") |
| | |
| | |
| | print("\nπ Loading preference pairs...") |
| | pairs = load_preference_pairs() |
| | formatted_pairs = format_for_sft(pairs, tokenizer) |
| | |
| | print(f"β
Loaded {len(pairs)} preference pairs") |
| | print("π Sample training text:") |
| | print(formatted_pairs[0]["text"][:200] + "...") |
| | |
| | |
| | train_dataset = Dataset.from_list(formatted_pairs) |
| | tokenized_dataset = train_dataset.map( |
| | lambda x: tokenize_function(x, tokenizer), |
| | batched=True, |
| | remove_columns=train_dataset.column_names |
| | ) |
| | |
| | print(f"π Tokenized dataset size: {len(tokenized_dataset)} examples") |
| | |
| | |
| | print("\nβοΈ Configuring training (3 epochs)...") |
| | training_args = TrainingArguments( |
| | output_dir="./smollm_tool_adapter", |
| | num_train_epochs=3, |
| | per_device_train_batch_size=1, |
| | gradient_accumulation_steps=4, |
| | learning_rate=5e-5, |
| | warmup_steps=10, |
| | logging_steps=1, |
| | save_steps=50, |
| | save_total_limit=2, |
| | remove_unused_columns=False, |
| | fp16=torch.cuda.is_available(), |
| | dataloader_pin_memory=False, |
| | report_to=None, |
| | logging_dir="./logs" |
| | ) |
| | |
| | |
| | data_collator = DataCollatorForLanguageModeling( |
| | tokenizer=tokenizer, |
| | mlm=False, |
| | ) |
| | |
| | |
| | print("ποΈ Initializing trainer...") |
| | trainer = Trainer( |
| | model=model, |
| | args=training_args, |
| | train_dataset=tokenized_dataset, |
| | data_collator=data_collator, |
| | ) |
| | |
| | print("β
Trainer ready") |
| | |
| | |
| | print("\nπ― Starting training...") |
| | print("β±οΈ This should take ~8-15 minutes on M4 Max, longer on CPU") |
| | |
| | |
| | print("π Beginning training...") |
| | train_result = trainer.train() |
| | |
| | print("\nπ Training completed!") |
| | print(f"π Final training loss: {train_result.training_loss:.4f}") |
| | print(f"β±οΈ Training time: {train_result.metrics.get('train_runtime', 0):.1f} seconds") |
| | |
| | |
| | print("\nπΎ Saving model adapter...") |
| | model.save_pretrained("./smollm_tool_adapter") |
| | tokenizer.save_pretrained("./smollm_tool_adapter") |
| | |
| | print("β
Model saved to './smollm_tool_adapter'") |
| | print("π Training complete! Ready for testing.") |
| | |
| | |
| | print("\nπ§ͺ Quick functionality test...") |
| | test_prompt = """<|im_start|>system |
| | You are a helpful assistant that calls functions by responding with valid JSON when given a schema. Always respond with JSON function calls only, never prose.<|im_end|> |
| | |
| | <schema> |
| | { |
| | "name": "get_stock_price", |
| | "description": "Return the latest price for a given ticker symbol.", |
| | "parameters": { |
| | "type": "object", |
| | "properties": { |
| | "ticker": {"type": "string"} |
| | }, |
| | "required": ["ticker"] |
| | } |
| | } |
| | </schema> |
| | |
| | <|im_start|>user |
| | What's Microsoft trading at?<|im_end|> |
| | <|im_start|>assistant |
| | """ |
| | |
| | inputs = tokenizer(test_prompt, return_tensors="pt") |
| | with torch.no_grad(): |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=50, |
| | temperature=0.1, |
| | do_sample=True, |
| | pad_token_id=tokenizer.eos_token_id |
| | ) |
| | |
| | response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True) |
| | print(f"π€ Model response: {response.strip()}") |
| | |
| | return model, tokenizer |
| |
|
| | if __name__ == "__main__": |
| | model, tokenizer = main() |