| |
| """ |
| Stack 2.9 HuggingFace Upload Script |
| Pushes the trained model to HuggingFace Hub with proper model card. |
| """ |
|
|
| import argparse |
| import os |
| import sys |
| from pathlib import Path |
|
|
| |
| sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
|
|
|
| def parse_args(): |
| parser = argparse.ArgumentParser(description="Upload Stack 2.9 to HuggingFace") |
| parser.add_argument( |
| "--model-path", |
| type=str, |
| default="./output/stack-2.9-quantized", |
| help="Path to quantized model" |
| ) |
| parser.add_argument( |
| "--repo-id", |
| type=str, |
| required=True, |
| help="HuggingFace repo ID (e.g., 'username/stack-2.9')" |
| ) |
| parser.add_argument( |
| "--token", |
| type=str, |
| default=None, |
| help="HuggingFace token (or set HF_TOKEN env var)" |
| ) |
| parser.add_argument( |
| "--private", |
| action="store_true", |
| help="Create private repo" |
| ) |
| parser.add_argument( |
| "--create-model-card", |
| action="store_true", |
| default=True, |
| help="Create model card automatically" |
| ) |
| parser.add_argument( |
| "--push-to-hub", |
| action="store_true", |
| default=True, |
| help="Actually push to Hub (else just prepare locally)" |
| ) |
| parser.add_argument( |
| "--add-spaces", |
| action="store_true", |
| help="Create Gradio Spaces demo" |
| ) |
| return parser.parse_args() |
|
|
|
|
| def get_token(): |
| """Get HuggingFace token from args or env.""" |
| return os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_TOKEN") |
|
|
|
|
| def create_model_card(args, base_model: str = "Qwen/Qwen2.5-Coder-32B") -> str: |
| """Generate model card content.""" |
| |
| |
| benchmarks = "" |
| benchmarks_path = Path(__file__).parent.parent / "BENCHMARKS.md" |
| if benchmarks_path.exists(): |
| benchmarks_content = benchmarks_path.read_text() |
| |
| if "## Results" in benchmarks_content: |
| benchmarks = benchmarks_content.split("## Results")[1].split("#")[0] |
| |
| model_card = f"""--- |
| title: Stack 2.9 |
| base_model: {base_model} |
| tags: |
| - stack-2.9 |
| - open-source |
| - claude-competitor |
| - code-generation |
| - qwen |
| - fine-tuned |
| - transformers |
| pipeline_tag: text-generation |
| license: apache-2.0 |
| --- |
| |
| # Stack 2.9 |
| |
| Stack 2.9 is a fine-tuned version of Qwen2.5-Coder-32B, specialized for code generation and software development tasks. |
| |
| ## Model Details |
| |
| - **Base Model**: Qwen2.5-Coder-32B |
| - **Training Data**: Curated coding examples and educational content |
| - **Fine-tuning Method**: LoRA + Merge |
| - **Quantization**: 4-bit (bitsandbytes) |
| |
| ## Quick Start |
| |
| ```python |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| "{args.repo_id}", |
| torch_dtype="auto", |
| device_map="auto" |
| ) |
| |
| tokenizer = AutoTokenizer.from_pretrained("{args.repo_id}") |
| |
| # Chat format |
| messages = [ |
| {{"role": "system", "content": "You are Stack, a helpful coding assistant."}}, |
| {{"role": "user", "content": "Write a Python function to calculate fibonacci numbers"}} |
| ]] |
| |
| text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| inputs = tokenizer(text, return_tensors="pt").to(model.device) |
| |
| outputs = model.generate(**inputs, max_new_tokens=512) |
| print(tokenizer.decode(outputs[0], skip_special_tokens=True)) |
| ``` |
| |
| ## Requirements |
| |
| ```bash |
| pip install transformers>=4.40.0 torch>=2.0.0 accelerate |
| ``` |
| |
| ## Inference with vLLM |
| |
| ```bash |
| vllm serve {args.repo_id} --dtype half |
| ``` |
| |
| ## Benchmarks |
| |
| {benchmarks} |
| |
| ## Limitations |
| |
| - Trained on limited dataset; may not cover all edge cases |
| - Context window: 32K tokens |
| - Model may produce incorrect code; always verify outputs |
| |
| ## License |
| |
| Apache 2.0 - See LICENSE file for details. |
| |
| ## Citation |
| |
| ```bibtex |
| @misc{{stack-2.9, |
| title = {{Stack 2.9}}, |
| author = {{Stack Team}}, |
| year = {{2024}}, |
| url = {{https://huggingface.co/{args.repo_id}}} |
| }} |
| ``` |
| """ |
| return model_card |
|
|
|
|
| def create_gradio_demo(repo_id: str) -> str: |
| """Create a simple Gradio demo for the model.""" |
| demo_code = '''import gradio as gr |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch |
| |
| # Load model |
| MODEL_NAME = "{{REPO_ID}}" |
| |
| print("Loading model...") |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_NAME, |
| torch_dtype=torch.float16, |
| device_map="auto", |
| trust_remote_code=True |
| ) |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) |
| |
| def generate(prompt, max_tokens, temperature): |
| messages = [ |
| {"role": "system", "content": "You are Stack, a helpful coding assistant."}, |
| {"role": "user", "content": prompt} |
| ] |
| |
| text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| inputs = tokenizer(text, return_tensors="pt").to(model.device) |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=int(max_tokens), |
| temperature=temperature, |
| do_sample=True |
| ) |
| |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| # Extract just the assistant response |
| return response.split("assistant")[-1].strip() |
| |
| demo = gr.Interface( |
| fn=generate, |
| inputs=[ |
| gr.Textbox(label="Prompt", placeholder="Write a Python function..."), |
| gr.Slider(32, 1024, value=512, step=32, label="Max Tokens"), |
| gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature") |
| ], |
| outputs=gr.Markdown(label="Response"), |
| title="Stack 2.9 Demo", |
| description="Try Stack 2.9 - a code generation model" |
| ) |
| |
| demo.launch() |
| ''' |
| return demo_code.replace("{{REPO_ID}}", repo_id) |
|
|
|
|
| def main(): |
| args = parse_args() |
| |
| token = args.token or get_token() |
| if not token: |
| print("Error: No HuggingFace token provided") |
| print("Set HF_TOKEN environment variable or pass --token") |
| sys.exit(1) |
| |
| print("=" * 60) |
| print("Stack 2.9 HuggingFace Upload") |
| print("=" * 60) |
| print(f"Model path: {args.model_path}") |
| print(f"Repo ID: {args.repo_id}") |
| print(f"Private: {args.private}") |
| print("=" * 60) |
| |
| |
| if not os.path.exists(args.model_path): |
| print(f"Error: Model path {args.model_path} does not exist") |
| sys.exit(1) |
| |
| |
| if args.create_model_card: |
| print("Creating model card...") |
| model_card = create_model_card(args) |
| model_card_path = os.path.join(args.model_path, "README.md") |
| with open(model_card_path, "w") as f: |
| f.write(model_card) |
| print(f" Created: {model_card_path}") |
| |
| |
| if args.push_to_hub: |
| print("\nPushing to HuggingFace Hub...") |
| try: |
| from huggingface_hub import HfApi, create_repo |
| |
| |
| api = HfApi(token=token) |
| try: |
| create_repo( |
| args.repo_id, |
| token=token, |
| private=args.private, |
| repo_type="model", |
| exist_ok=True |
| ) |
| print(f" Repo created/verified: {args.repo_id}") |
| except Exception as e: |
| print(f" Repo creation: {e}") |
| |
| |
| print(" Uploading model files...") |
| api.upload_folder( |
| folder_path=args.model_path, |
| repo_id=args.repo_id, |
| repo_type="model", |
| commit_message="Upload Stack 2.9 model" |
| ) |
| |
| print(f"\n✓ Successfully uploaded to https://huggingface.co/{args.repo_id}") |
| |
| except ImportError: |
| print("Error: huggingface_hub not installed") |
| print("Run: pip install huggingface_hub") |
| sys.exit(1) |
| except Exception as e: |
| print(f"Upload failed: {e}") |
| sys.exit(1) |
| |
| |
| if args.add_spaces: |
| print("\nCreating Gradio Spaces demo...") |
| demo_code = create_gradio_demo(args.repo_id) |
| spaces_dir = "./stack-2.9-spaces" |
| os.makedirs(spaces_dir, exist_ok=True) |
| |
| with open(os.path.join(spaces_dir, "app.py"), "w") as f: |
| f.write(demo_code) |
| |
| |
| with open(os.path.join(spaces_dir, "requirements.txt"), "w") as f: |
| f.write("""gradio |
| transformers>=4.40.0 |
| torch>=2.0.0 |
| accelerate |
| """) |
| |
| |
| with open(os.path.join(spaces_dir, "README.md"), "w") as f: |
| f.write(f"""--- |
| title: Stack 2.9 Demo |
| emoji: 🤖 |
| colorFrom: blue |
| colorTo: purple |
| sdk: gradio |
| app_file: app.py |
| pinned: false |
| --- |
| |
| # Stack 2.9 Gradio Demo |
| |
| Live demo of Stack 2.9 code generation model. |
| |
| [Launch on HuggingFace Spaces](https://huggingface.co/spaces/{args.repo_id.replace('/', '-')}) |
| """) |
| |
| print(f" Created: {spaces_dir}/") |
| |
| |
| if args.push_to_hub: |
| try: |
| from huggingface_hub import create_repo |
| spaces_repo = args.repo_id.replace("models", "spaces") |
| create_repo(spaces_repo, token=token, repo_type="space", exist_ok=True) |
| print(f" Spaces repo: https://huggingface.co/spaces/{spaces_repo}") |
| except Exception as e: |
| print(f" Spaces creation: {e}") |
| |
| print("\n✓ Upload complete!") |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| sys.exit(main()) |