"""HuggingFace Hub push and project ZIP packaging. Creates ZIP archives from extracted project files and pushes projects to HuggingFace Spaces or model repos. """ from __future__ import annotations import logging import os import re import tempfile import zipfile from pathlib import Path from typing import Any from code.config.constants import MODEL_ID from code.huggingface.dockerfile_gen import ( detect_framework, is_js_project, scaffold_js_project, ) logger = logging.getLogger(__name__) # ─── Import-to-Package Mapping ────────────────────────────────────────── IMPORT_TO_PACKAGE: dict[str, str] = { "gradio": "gradio>=4.0.0", "flask": "flask>=3.0.0", "django": "django>=4.2.0", "fastapi": "fastapi>=0.100.0", "uvicorn": "uvicorn>=0.23.0", "streamlit": "streamlit>=1.28.0", "matplotlib": "matplotlib>=3.8.0", "PIL": "Pillow>=10.0.0", "Pillow": "Pillow>=10.0.0", "numpy": "numpy>=1.24.0", "pandas": "pandas>=2.0.0", "scipy": "scipy>=1.11.0", "sklearn": "scikit-learn>=1.3.0", "scikit_learn": "scikit-learn>=1.3.0", "torch": "torch>=2.1.0", "tensorflow": "tensorflow>=2.14.0", "transformers": "transformers>=4.35.0", "requests": "requests>=2.31.0", "beautifulsoup4": "beautifulsoup4>=4.12.0", "bs4": "beautifulsoup4>=4.12.0", "selenium": "selenium>=4.15.0", "sqlalchemy": "sqlalchemy>=2.0.0", "pydantic": "pydantic>=2.0.0", "httpx": "httpx>=0.25.0", "aiohttp": "aiohttp>=3.9.0", "opencv": "opencv-python-headless>=4.8.0", "cv2": "opencv-python-headless>=4.8.0", "plotly": "plotly>=5.18.0", "seaborn": "seaborn>=0.13.0", "wordcloud": "wordcloud>=1.9.0", "networkx": "networkx>=3.2.0", "sympy": "sympy>=1.12", "Pillow": "Pillow>=10.0.0", "skimage": "scikit-image>=0.21.0", "soundfile": "soundfile>=0.12.0", "pydub": "pydub>=0.25.1", "moviepy": "moviepy>=1.0.3", "openpyxl": "openpyxl>=3.1.0", "xlsxwriter": "xlsxwriter>=3.1.0", "python-docx": "python-docx>=0.8.11", "docx": "python-docx>=0.8.11", "reportlab": "reportlab>=4.0.0", "jinja2": "jinja2>=3.1.0", "wtforms": "wtforms>=3.1.0", "flask_sqlalchemy": "flask-sqlalchemy>=3.1.0", "flask_login": "flask-login>=0.6.0", "flask_wtf": "flask-wtf>=1.2.0", "flask_cors": "flask-cors>=4.0.0", } def _scan_imports(code: str) -> list[str]: """Scan Python code for import statements and return package names.""" packages = set() # Match: import xxx for m in re.finditer(r"^\s*import\s+([a-zA-Z_][\w.]*)", code, re.MULTILINE): top_level = m.group(1).split(".")[0] packages.add(top_level) # Match: from xxx import ... for m in re.finditer(r"^\s*from\s+([a-zA-Z_][\w.]*)", code, re.MULTILINE): top_level = m.group(1).split(".")[0] packages.add(top_level) return sorted(packages) def generate_requirements(code: str) -> str: """Generate requirements.txt content from code by scanning imports. Returns a newline-separated string of pip package specs. """ packages = _scan_imports(code) reqs: list[str] = [] for pkg in packages: if pkg in IMPORT_TO_PACKAGE: req_spec = IMPORT_TO_PACKAGE[pkg] if req_spec not in reqs: reqs.append(req_spec) # Skip stdlib modules (os, sys, json, re, math, etc.) # Always include gradio for Gradio apps if not already if "import gradio" in code or "from gradio" in code: if "gradio" not in [r.split(">=")[0].split("[")[0] for r in reqs]: reqs.insert(0, "gradio>=4.0.0") return "\n".join(reqs) + "\n" if reqs else "" def _find_entry_point(files: dict[str, str]) -> str: """Find the main entry point file for a project. Looks for app.py, main.py, or any Python file with a launcher pattern. """ # Priority order for Python entry points candidates = ["app.py", "main.py", "index.py", "server.py", "run.py"] for c in candidates: if c in files: return c # Priority order for JS entry points js_candidates = ["index.js", "server.js", "src/index.js", "src/main.jsx", "src/main.tsx"] for c in js_candidates: if c in files: return c # Look for any .py file with if __name__ == "__main__" or .launch() for fname, content in files.items(): if fname.endswith(".py"): if "__main__" in content or ".launch(" in content or "app.run(" in content: return fname # Fall back to first .py file for fname in files: if fname.endswith(".py"): return fname # Fall back to first file return next(iter(files), "app.py") def _detect_sdk(files: dict[str, str], entry: str) -> str: """Auto-detect the best Space SDK from the project files.""" all_code = "\n".join(files.values()) if "import streamlit" in all_code or "from streamlit" in all_code: return "streamlit" if "import gradio" in all_code or "from gradio" in all_code: return "gradio" # JS/TS projects → Docker if is_js_project(files): return "docker" if any(f.endswith(".html") for f in files): return "static" if entry.endswith(".py"): return "gradio" # Default Python to Gradio SDK return "static" def create_project_zip(files: dict[str, str], project_name: str) -> str: """Create a ZIP file from extracted project files. Returns the path to the created ZIP file. """ zip_dir = tempfile.mkdtemp(prefix="fullstack_project_") zip_path = os.path.join(zip_dir, f"{project_name}.zip") with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: for filepath, content in files.items(): zf.writestr(f"{project_name}/{filepath}", content) return zip_path def push_to_huggingface( files: dict[str, str], project_name: str, repo_name: str, hf_token: str, space_sdk: str = "static", is_space: bool = True, ) -> dict[str, Any]: """Push generated project to HuggingFace Hub. Creates the repo if it doesn't exist, writes all files, and adds README.md, Dockerfile, package.json, and requirements.txt as needed. """ try: from huggingface_hub import HfApi, create_repo api = HfApi(token=hf_token) if "/" in repo_name: namespace, name = repo_name.split("/", 1) else: user_info = api.whoami() namespace = user_info["name"] name = repo_name repo_name = f"{namespace}/{name}" # Find entry point and auto-detect SDK entry_point = _find_entry_point(files) detected_sdk = _detect_sdk(files, entry_point) # Use detected SDK if user left it as "static" but project needs something else if space_sdk == "static" and detected_sdk != "static": space_sdk = detected_sdk # For JS projects, scaffold Docker support files if is_js_project(files) or space_sdk == "docker": framework = detect_framework(files) if framework == "static": # Single HTML file or simple JS — keep as static if any(f.endswith(".html") for f in files) and not is_js_project(files): space_sdk = "static" else: framework = "nodejs" space_sdk = "docker" if space_sdk == "docker": files = scaffold_js_project(files, framework, project_name) try: if is_space: create_repo( repo_id=repo_name, repo_type="space", space_sdk=space_sdk, token=hf_token, exist_ok=True, ) else: create_repo( repo_id=repo_name, repo_type="model", token=hf_token, exist_ok=True, ) except Exception as e: logger.warning("Repo creation warning: %s", e) with tempfile.TemporaryDirectory(prefix="hf_push_") as tmp_dir: # Write all project files for filepath, content in files.items(): full_path = os.path.join(tmp_dir, filepath) os.makedirs(os.path.dirname(full_path), exist_ok=True) Path(full_path).write_text(content, encoding="utf-8") # Ensure the entry point is named app.py for HF Spaces (Python) if entry_point != "app.py" and entry_point.endswith(".py") and is_space and space_sdk in ("gradio", "streamlit"): src = os.path.join(tmp_dir, entry_point) dst = os.path.join(tmp_dir, "app.py") if os.path.exists(src) and not os.path.exists(dst): import shutil shutil.copy2(src, dst) # Determine app_file for README if space_sdk == "docker": app_file = "Dockerfile" elif space_sdk in ("gradio", "streamlit"): app_file = "app.py" elif space_sdk == "static": if "index.html" in files: app_file = "index.html" elif any(f.endswith(".html") for f in files): app_file = next(f for f in files if f.endswith(".html")) else: app_file = entry_point else: app_file = entry_point # Add README.md if not present readme_path = os.path.join(tmp_dir, "README.md") if not os.path.exists(readme_path): readme_content = f"""--- title: {name} emoji: 🚀 colorFrom: blue colorTo: purple sdk: {space_sdk} app_file: {app_file} pinned: false --- # {name} Generated by Fullstack Code Builder using {MODEL_ID}. """ Path(readme_path).write_text(readme_content, encoding="utf-8") else: # Update app_file in existing README to match entry point existing = Path(readme_path).read_text(encoding="utf-8") if "app_file:" in existing: existing = re.sub( r"app_file:\s*\S+", f"app_file: {app_file}", existing ) if "sdk:" in existing: existing = re.sub( r"sdk:\s*\S+", f"sdk: {space_sdk}", existing ) Path(readme_path).write_text(existing, encoding="utf-8") # Add/merge requirements.txt for Python projects req_path = os.path.join(tmp_dir, "requirements.txt") has_python = any(f.endswith(".py") for f in files.keys()) if has_python and space_sdk != "docker": # Scan all Python code for imports all_py_code = "\n".join( content for fname, content in files.items() if fname.endswith(".py") ) auto_reqs = generate_requirements(all_py_code) if os.path.exists(req_path): # Merge with existing requirements.txt existing_reqs = Path(req_path).read_text(encoding="utf-8").strip() merged = set() for line in (existing_reqs + "\n" + auto_reqs).splitlines(): line = line.strip() if line and not line.startswith("#"): merged.add(line) Path(req_path).write_text("\n".join(sorted(merged)) + "\n", encoding="utf-8") elif auto_reqs: Path(req_path).write_text(auto_reqs, encoding="utf-8") else: # Minimal requirements for Python Spaces Path(req_path).write_text("gradio>=4.0.0\n", encoding="utf-8") api.upload_folder( folder_path=tmp_dir, repo_id=repo_name, repo_type="space" if is_space else "model", token=hf_token, ) repo_url = f"https://huggingface.co/{repo_name}" if is_space: repo_url = f"https://huggingface.co/spaces/{repo_name}" return { "success": True, "url": repo_url, "repo_name": repo_name, "message": f"Successfully pushed to {repo_url}", } except Exception as exc: logger.exception("Failed to push to HuggingFace") return { "success": False, "url": "", "repo_name": repo_name, "message": f"Failed to push: {str(exc)}", }