| | |
| | """ |
| | Script to pre-download T5 models with extended timeout settings |
| | """ |
| |
|
| | import os |
| | import time |
| | from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
| |
|
| | def download_t5_model(): |
| | """Download T5-base model and tokenizer with extended timeout""" |
| | |
| | |
| | os.environ['HF_HUB_TIMEOUT'] = '300' |
| | os.environ['REQUESTS_TIMEOUT'] = '300' |
| | |
| | print("Downloading T5-base model and tokenizer...") |
| | print("This may take several minutes depending on your connection...") |
| | |
| | try: |
| | print("Step 1/2: Downloading tokenizer...") |
| | tokenizer = AutoTokenizer.from_pretrained('t5-base') |
| | print("✅ Tokenizer downloaded successfully") |
| | |
| | print("Step 2/2: Downloading model...") |
| | model = AutoModelForSeq2SeqLM.from_pretrained('t5-base') |
| | print("✅ Model downloaded successfully") |
| | |
| | print("🎉 All models downloaded and cached!") |
| | print("You can now run the training scripts offline.") |
| | |
| | return True |
| | |
| | except Exception as e: |
| | print(f"❌ Download failed: {e}") |
| | print("\n💡 Alternative solutions:") |
| | print("1. Try again with better internet connection") |
| | print("2. Use a VPN if there are regional restrictions") |
| | print("3. Download manually from: https://huggingface.co/t5-base") |
| | return False |
| |
|
| | if __name__ == "__main__": |
| | success = download_t5_model() |
| | if success: |
| | print("\n✅ Ready for training! You can now run:") |
| | print(" powershell -ExecutionPolicy Bypass -File scripts/test_small_training.ps1") |
| | else: |
| | print("\n⚠️ Please fix connectivity and try again") |