| | |
| | """ |
| | Monitor Training and Auto-Deploy |
| | ================================= |
| | |
| | This script monitors the training process and automatically executes |
| | the remaining deployment steps when training completes. |
| | |
| | Usage: |
| | python monitor_and_deploy.py |
| | """ |
| |
|
| | import os |
| | import time |
| | import subprocess |
| | import psutil |
| | from pathlib import Path |
| |
|
| | def is_training_running(): |
| | """Check if training process is still running""" |
| | for proc in psutil.process_iter(['pid', 'name', 'cmdline']): |
| | try: |
| | if proc.info['cmdline'] and any('tool_trainer_simple_robust.py' in cmd for cmd in proc.info['cmdline']): |
| | return True, proc.info['pid'] |
| | except (psutil.NoSuchProcess, psutil.AccessDenied): |
| | continue |
| | return False, None |
| |
|
| | def check_model_files(): |
| | """Check if training has produced the required model files""" |
| | lora_dir = Path("./smollm3_robust") |
| | required_files = [ |
| | "adapter_config.json", |
| | "adapter_model.safetensors" |
| | ] |
| | |
| | existing_files = [] |
| | for file in required_files: |
| | if (lora_dir / file).exists(): |
| | existing_files.append(file) |
| | |
| | return len(existing_files) == len(required_files), existing_files |
| |
|
| | def run_command(cmd, description): |
| | """Run a command and return success status""" |
| | print(f"π {description}...") |
| | try: |
| | result = subprocess.run(cmd, shell=True, capture_output=True, text=True) |
| | if result.returncode == 0: |
| | print(f"β
{description} completed!") |
| | if result.stdout.strip(): |
| | print(f" Output: {result.stdout.strip()}") |
| | return True |
| | else: |
| | print(f"β {description} failed!") |
| | print(f" Error: {result.stderr.strip()}") |
| | return False |
| | except Exception as e: |
| | print(f"β {description} failed with exception: {e}") |
| | return False |
| |
|
| | def main(): |
| | print("π Monitoring training and preparing auto-deployment...") |
| | print("=" * 60) |
| | |
| | |
| | training_running, pid = is_training_running() |
| | if training_running: |
| | print(f"β³ Training is running (PID: {pid}). Waiting for completion...") |
| | |
| | while training_running: |
| | time.sleep(10) |
| | training_running, pid = is_training_running() |
| | |
| | |
| | files_ready, existing = check_model_files() |
| | if existing: |
| | print(f"π Found files: {existing}") |
| | |
| | if files_ready: |
| | print("π Model files detected! Training appears complete.") |
| | break |
| | |
| | print("β
Training process completed!") |
| | else: |
| | print("βΉοΈ No training process running. Checking for existing model files...") |
| | |
| | |
| | files_ready, existing = check_model_files() |
| | if not files_ready: |
| | print(f"β Required model files not found. Found: {existing}") |
| | print("π‘ Please ensure training completed successfully.") |
| | return |
| | |
| | print("β
All required model files found!") |
| | |
| | |
| | print("\nπ Executing automated deployment...") |
| | |
| | |
| | if run_command("python upload_lora_to_hub.py", "Upload LoRA to Hugging Face Hub"): |
| | |
| | if run_command("python test_constrained_model.py", "Test model locally"): |
| | |
| | if run_command("git push space deploy-lite:main", "Deploy to HF Spaces"): |
| | print("\nπ COMPLETE SUCCESS!") |
| | print("π Check your Hugging Face Spaces: https://huggingface.co/spaces/jlov7/Dynamic-Function-Calling-Agent") |
| | print("π LoRA Model Hub: https://huggingface.co/jlov7/SmolLM3-Function-Calling-LoRA") |
| | else: |
| | print("β οΈ HF Spaces deployment failed, but model is uploaded to Hub") |
| | else: |
| | print("β οΈ Local testing had issues, but proceeding with deployment") |
| | run_command("git push space deploy-lite:main", "Deploy to HF Spaces anyway") |
| | else: |
| | print("β Hub upload failed. Please run upload_lora_to_hub.py manually") |
| | |
| | print("\nπ Final Status:") |
| | print("β
PEFT dependency added") |
| | print("β
Hub loading enabled") |
| | print("β
Training completed") |
| | print("β
Model uploaded to Hub (if successful)") |
| | print("β
Deployed to HF Spaces") |
| | print("\nπ Your fine-tuned model should now work everywhere!") |
| |
|
| | if __name__ == "__main__": |
| | main() |