| | """ |
| | Comprehensive script to publish model and codebase to Hugging Face Hub |
| | """ |
| | import argparse |
| | import os |
| | import sys |
| | from pathlib import Path |
| | from huggingface_hub import HfApi, create_repo, upload_folder, upload_file |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| |
|
| | |
| | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
| |
|
| |
|
| | def publish_to_hub( |
| | model_path: str, |
| | repo_id: str, |
| | private: bool = False, |
| | upload_code: bool = True, |
| | upload_model: bool = True |
| | ): |
| | """ |
| | Publish model and codebase to Hugging Face Hub. |
| | |
| | Args: |
| | model_path: Path to the trained model |
| | repo_id: Full repository ID (e.g., "username/repo-name") |
| | private: Whether to make the repository private |
| | upload_code: Whether to upload code files |
| | upload_model: Whether to upload the model |
| | """ |
| | print("=" * 70) |
| | print("Publishing to Hugging Face Hub") |
| | print("=" * 70) |
| | print(f"\nRepository: {repo_id}") |
| | print(f"Private: {private}") |
| | print(f"Upload Model: {upload_model}") |
| | print(f"Upload Code: {upload_code}") |
| | |
| | api = HfApi() |
| | |
| | |
| | print("\n[1/4] Creating/verifying repository...") |
| | try: |
| | create_repo( |
| | repo_id=repo_id, |
| | repo_type="model", |
| | exist_ok=True, |
| | private=private |
| | ) |
| | print(f"✓ Repository ready: {repo_id}") |
| | except Exception as e: |
| | print(f"✗ Error creating repository: {e}") |
| | print("\nMake sure you're logged in:") |
| | print(" huggingface-cli login") |
| | return False |
| | |
| | |
| | if upload_model: |
| | print("\n[2/4] Uploading model and tokenizer...") |
| | try: |
| | if not os.path.exists(model_path): |
| | print(f"✗ Model path not found: {model_path}") |
| | print(" Skipping model upload. You can upload it later.") |
| | else: |
| | tokenizer = AutoTokenizer.from_pretrained(model_path) |
| | model = AutoModelForSequenceClassification.from_pretrained(model_path) |
| | |
| | model.push_to_hub(repo_id) |
| | tokenizer.push_to_hub(repo_id) |
| | print("✓ Model and tokenizer uploaded") |
| | except Exception as e: |
| | print(f"✗ Error uploading model: {e}") |
| | print(" You can upload the model separately later.") |
| | else: |
| | print("\n[2/4] Skipping model upload (--no-model flag)") |
| | |
| | |
| | if upload_code: |
| | print("\n[3/4] Uploading code files...") |
| | try: |
| | repo_root = Path(__file__).parent.parent |
| | |
| | |
| | code_files = [ |
| | "train.py", |
| | "inference.py", |
| | "config.yaml", |
| | "requirements.txt", |
| | "setup.py", |
| | "README.md", |
| | "MODEL_CARD.md", |
| | "LICENSE", |
| | ".gitignore" |
| | ] |
| | |
| | |
| | code_dirs = [ |
| | "src", |
| | "scripts" |
| | ] |
| | |
| | uploaded_count = 0 |
| | |
| | |
| | for file_name in code_files: |
| | file_path = repo_root / file_name |
| | if file_path.exists(): |
| | try: |
| | upload_file( |
| | path_or_fileobj=str(file_path), |
| | path_in_repo=file_name, |
| | repo_id=repo_id, |
| | repo_type="model" |
| | ) |
| | print(f" ✓ Uploaded {file_name}") |
| | uploaded_count += 1 |
| | except Exception as e: |
| | print(f" ⚠ Could not upload {file_name}: {e}") |
| | |
| | |
| | for dir_name in code_dirs: |
| | dir_path = repo_root / dir_name |
| | if dir_path.exists() and dir_path.is_dir(): |
| | try: |
| | upload_folder( |
| | folder_path=str(dir_path), |
| | path_in_repo=dir_name, |
| | repo_id=repo_id, |
| | repo_type="model", |
| | ignore_patterns=["__pycache__", "*.pyc", ".DS_Store"] |
| | ) |
| | print(f" ✓ Uploaded {dir_name}/") |
| | uploaded_count += 1 |
| | except Exception as e: |
| | print(f" ⚠ Could not upload {dir_name}/: {e}") |
| | |
| | print(f"\n✓ Uploaded {uploaded_count} code files/directories") |
| | |
| | except Exception as e: |
| | print(f"✗ Error uploading code: {e}") |
| | else: |
| | print("\n[3/4] Skipping code upload (--no-code flag)") |
| | |
| | |
| | print("\n[4/4] Publishing complete!") |
| | print("\n" + "=" * 70) |
| | print("Success! 🎉") |
| | print("=" * 70) |
| | print(f"\nYour model is now available at:") |
| | print(f"https://huggingface.co/{repo_id}") |
| | |
| | if upload_model: |
| | print("\nTo use your model:") |
| | print(f""" |
| | from transformers import pipeline |
| | |
| | classifier = pipeline("text-classification", model="{repo_id}") |
| | |
| | # Classify a comment |
| | result = classifier("This function uses dynamic programming for O(n) time complexity") |
| | print(result) |
| | """) |
| | |
| | return True |
| |
|
| |
|
| | if __name__ == "__main__": |
| | parser = argparse.ArgumentParser( |
| | description="Publish model and codebase to Hugging Face Hub", |
| | formatter_class=argparse.RawDescriptionHelpFormatter, |
| | epilog=""" |
| | Examples: |
| | # Publish everything (model + code) |
| | python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier |
| | |
| | # Publish only code (no model) |
| | python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-model |
| | |
| | # Publish only model (no code) |
| | python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-code |
| | |
| | # Private repository |
| | python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --private |
| | """ |
| | ) |
| | parser.add_argument( |
| | "--model-path", |
| | type=str, |
| | default="./results/final_model", |
| | help="Path to the trained model" |
| | ) |
| | parser.add_argument( |
| | "--repo-id", |
| | type=str, |
| | default="Snaseem2026/code-comment-classifier", |
| | help="Full repository ID (e.g., 'username/repo-name')" |
| | ) |
| | parser.add_argument( |
| | "--private", |
| | action="store_true", |
| | help="Make the repository private" |
| | ) |
| | parser.add_argument( |
| | "--no-code", |
| | action="store_true", |
| | help="Skip uploading code files" |
| | ) |
| | parser.add_argument( |
| | "--no-model", |
| | action="store_true", |
| | help="Skip uploading model files" |
| | ) |
| | parser.add_argument( |
| | "--yes", |
| | action="store_true", |
| | help="Skip confirmation prompt" |
| | ) |
| | |
| | args = parser.parse_args() |
| | |
| | print("\n" + "=" * 70) |
| | print("Hugging Face Hub Publishing") |
| | print("=" * 70) |
| | print("\nBefore publishing, make sure you:") |
| | print("1. Have a Hugging Face account") |
| | print("2. Are logged in: huggingface-cli login") |
| | print("3. Have reviewed MODEL_CARD.md and README.md") |
| | print(f"4. Model path exists: {args.model_path} ({'✓' if os.path.exists(args.model_path) else '✗'})") |
| | |
| | if not args.yes: |
| | print("\n" + "=" * 70) |
| | response = input(f"\nProceed with publishing to {args.repo_id}? (yes/no): ") |
| | if response.lower() not in ['yes', 'y']: |
| | print("Publishing cancelled.") |
| | sys.exit(0) |
| | |
| | success = publish_to_hub( |
| | model_path=args.model_path, |
| | repo_id=args.repo_id, |
| | private=args.private, |
| | upload_code=not args.no_code, |
| | upload_model=not args.no_model |
| | ) |
| | |
| | if not success: |
| | sys.exit(1) |
| |
|