| """HuggingFace Hub push and project ZIP packaging. |
| |
| Creates ZIP archives from extracted project files and pushes |
| projects to HuggingFace Spaces or model repos. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import logging |
| import os |
| import re |
| import tempfile |
| import zipfile |
| from pathlib import Path |
| from typing import Any |
|
|
| from code.config.constants import MODEL_ID |
| from code.huggingface.dockerfile_gen import ( |
| detect_framework, |
| is_js_project, |
| scaffold_js_project, |
| ) |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
|
|
| IMPORT_TO_PACKAGE: dict[str, str] = { |
| "gradio": "gradio>=4.0.0", |
| "flask": "flask>=3.0.0", |
| "django": "django>=4.2.0", |
| "fastapi": "fastapi>=0.100.0", |
| "uvicorn": "uvicorn>=0.23.0", |
| "streamlit": "streamlit>=1.28.0", |
| "matplotlib": "matplotlib>=3.8.0", |
| "PIL": "Pillow>=10.0.0", |
| "Pillow": "Pillow>=10.0.0", |
| "numpy": "numpy>=1.24.0", |
| "pandas": "pandas>=2.0.0", |
| "scipy": "scipy>=1.11.0", |
| "sklearn": "scikit-learn>=1.3.0", |
| "scikit_learn": "scikit-learn>=1.3.0", |
| "torch": "torch>=2.1.0", |
| "tensorflow": "tensorflow>=2.14.0", |
| "transformers": "transformers>=4.35.0", |
| "requests": "requests>=2.31.0", |
| "beautifulsoup4": "beautifulsoup4>=4.12.0", |
| "bs4": "beautifulsoup4>=4.12.0", |
| "selenium": "selenium>=4.15.0", |
| "sqlalchemy": "sqlalchemy>=2.0.0", |
| "pydantic": "pydantic>=2.0.0", |
| "httpx": "httpx>=0.25.0", |
| "aiohttp": "aiohttp>=3.9.0", |
| "opencv": "opencv-python-headless>=4.8.0", |
| "cv2": "opencv-python-headless>=4.8.0", |
| "plotly": "plotly>=5.18.0", |
| "seaborn": "seaborn>=0.13.0", |
| "wordcloud": "wordcloud>=1.9.0", |
| "networkx": "networkx>=3.2.0", |
| "sympy": "sympy>=1.12", |
| "Pillow": "Pillow>=10.0.0", |
| "skimage": "scikit-image>=0.21.0", |
| "soundfile": "soundfile>=0.12.0", |
| "pydub": "pydub>=0.25.1", |
| "moviepy": "moviepy>=1.0.3", |
| "openpyxl": "openpyxl>=3.1.0", |
| "xlsxwriter": "xlsxwriter>=3.1.0", |
| "python-docx": "python-docx>=0.8.11", |
| "docx": "python-docx>=0.8.11", |
| "reportlab": "reportlab>=4.0.0", |
| "jinja2": "jinja2>=3.1.0", |
| "wtforms": "wtforms>=3.1.0", |
| "flask_sqlalchemy": "flask-sqlalchemy>=3.1.0", |
| "flask_login": "flask-login>=0.6.0", |
| "flask_wtf": "flask-wtf>=1.2.0", |
| "flask_cors": "flask-cors>=4.0.0", |
| } |
|
|
|
|
| def _scan_imports(code: str) -> list[str]: |
| """Scan Python code for import statements and return package names.""" |
| packages = set() |
|
|
| |
| for m in re.finditer(r"^\s*import\s+([a-zA-Z_][\w.]*)", code, re.MULTILINE): |
| top_level = m.group(1).split(".")[0] |
| packages.add(top_level) |
|
|
| |
| for m in re.finditer(r"^\s*from\s+([a-zA-Z_][\w.]*)", code, re.MULTILINE): |
| top_level = m.group(1).split(".")[0] |
| packages.add(top_level) |
|
|
| return sorted(packages) |
|
|
|
|
| def generate_requirements(code: str) -> str: |
| """Generate requirements.txt content from code by scanning imports. |
| |
| Returns a newline-separated string of pip package specs. |
| """ |
| packages = _scan_imports(code) |
| reqs: list[str] = [] |
|
|
| for pkg in packages: |
| if pkg in IMPORT_TO_PACKAGE: |
| req_spec = IMPORT_TO_PACKAGE[pkg] |
| if req_spec not in reqs: |
| reqs.append(req_spec) |
| |
|
|
| |
| if "import gradio" in code or "from gradio" in code: |
| if "gradio" not in [r.split(">=")[0].split("[")[0] for r in reqs]: |
| reqs.insert(0, "gradio>=4.0.0") |
|
|
| return "\n".join(reqs) + "\n" if reqs else "" |
|
|
|
|
| def _find_entry_point(files: dict[str, str]) -> str: |
| """Find the main entry point file for a project. |
| |
| Looks for app.py, main.py, or any Python file with a launcher pattern. |
| """ |
| |
| candidates = ["app.py", "main.py", "index.py", "server.py", "run.py"] |
| for c in candidates: |
| if c in files: |
| return c |
|
|
| |
| js_candidates = ["index.js", "server.js", "src/index.js", "src/main.jsx", "src/main.tsx"] |
| for c in js_candidates: |
| if c in files: |
| return c |
|
|
| |
| for fname, content in files.items(): |
| if fname.endswith(".py"): |
| if "__main__" in content or ".launch(" in content or "app.run(" in content: |
| return fname |
|
|
| |
| for fname in files: |
| if fname.endswith(".py"): |
| return fname |
|
|
| |
| return next(iter(files), "app.py") |
|
|
|
|
| def _detect_sdk(files: dict[str, str], entry: str) -> str: |
| """Auto-detect the best Space SDK from the project files.""" |
| all_code = "\n".join(files.values()) |
|
|
| if "import streamlit" in all_code or "from streamlit" in all_code: |
| return "streamlit" |
| if "import gradio" in all_code or "from gradio" in all_code: |
| return "gradio" |
|
|
| |
| if is_js_project(files): |
| return "docker" |
|
|
| if any(f.endswith(".html") for f in files): |
| return "static" |
| if entry.endswith(".py"): |
| return "gradio" |
|
|
| return "static" |
|
|
|
|
| def create_project_zip(files: dict[str, str], project_name: str) -> str: |
| """Create a ZIP file from extracted project files. |
| |
| Returns the path to the created ZIP file. |
| """ |
| zip_dir = tempfile.mkdtemp(prefix="fullstack_project_") |
| zip_path = os.path.join(zip_dir, f"{project_name}.zip") |
|
|
| with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: |
| for filepath, content in files.items(): |
| zf.writestr(f"{project_name}/{filepath}", content) |
|
|
| return zip_path |
|
|
|
|
| def push_to_huggingface( |
| files: dict[str, str], |
| project_name: str, |
| repo_name: str, |
| hf_token: str, |
| space_sdk: str = "static", |
| is_space: bool = True, |
| ) -> dict[str, Any]: |
| """Push generated project to HuggingFace Hub. |
| |
| Creates the repo if it doesn't exist, writes all files, |
| and adds README.md, Dockerfile, package.json, and requirements.txt as needed. |
| """ |
| try: |
| from huggingface_hub import HfApi, create_repo |
|
|
| api = HfApi(token=hf_token) |
|
|
| if "/" in repo_name: |
| namespace, name = repo_name.split("/", 1) |
| else: |
| user_info = api.whoami() |
| namespace = user_info["name"] |
| name = repo_name |
| repo_name = f"{namespace}/{name}" |
|
|
| |
| entry_point = _find_entry_point(files) |
| detected_sdk = _detect_sdk(files, entry_point) |
|
|
| |
| if space_sdk == "static" and detected_sdk != "static": |
| space_sdk = detected_sdk |
|
|
| |
| if is_js_project(files) or space_sdk == "docker": |
| framework = detect_framework(files) |
| if framework == "static": |
| |
| if any(f.endswith(".html") for f in files) and not is_js_project(files): |
| space_sdk = "static" |
| else: |
| framework = "nodejs" |
| space_sdk = "docker" |
|
|
| if space_sdk == "docker": |
| files = scaffold_js_project(files, framework, project_name) |
|
|
| try: |
| if is_space: |
| create_repo( |
| repo_id=repo_name, |
| repo_type="space", |
| space_sdk=space_sdk, |
| token=hf_token, |
| exist_ok=True, |
| ) |
| else: |
| create_repo( |
| repo_id=repo_name, |
| repo_type="model", |
| token=hf_token, |
| exist_ok=True, |
| ) |
| except Exception as e: |
| logger.warning("Repo creation warning: %s", e) |
|
|
| with tempfile.TemporaryDirectory(prefix="hf_push_") as tmp_dir: |
| |
| for filepath, content in files.items(): |
| full_path = os.path.join(tmp_dir, filepath) |
| os.makedirs(os.path.dirname(full_path), exist_ok=True) |
| Path(full_path).write_text(content, encoding="utf-8") |
|
|
| |
| if entry_point != "app.py" and entry_point.endswith(".py") and is_space and space_sdk in ("gradio", "streamlit"): |
| src = os.path.join(tmp_dir, entry_point) |
| dst = os.path.join(tmp_dir, "app.py") |
| if os.path.exists(src) and not os.path.exists(dst): |
| import shutil |
| shutil.copy2(src, dst) |
|
|
| |
| if space_sdk == "docker": |
| app_file = "Dockerfile" |
| elif space_sdk in ("gradio", "streamlit"): |
| app_file = "app.py" |
| elif space_sdk == "static": |
| if "index.html" in files: |
| app_file = "index.html" |
| elif any(f.endswith(".html") for f in files): |
| app_file = next(f for f in files if f.endswith(".html")) |
| else: |
| app_file = entry_point |
| else: |
| app_file = entry_point |
|
|
| |
| readme_path = os.path.join(tmp_dir, "README.md") |
| if not os.path.exists(readme_path): |
| readme_content = f"""--- |
| title: {name} |
| emoji: 🚀 |
| colorFrom: blue |
| colorTo: purple |
| sdk: {space_sdk} |
| app_file: {app_file} |
| pinned: false |
| --- |
| |
| # {name} |
| |
| Generated by Fullstack Code Builder using {MODEL_ID}. |
| """ |
| Path(readme_path).write_text(readme_content, encoding="utf-8") |
| else: |
| |
| existing = Path(readme_path).read_text(encoding="utf-8") |
| if "app_file:" in existing: |
| existing = re.sub( |
| r"app_file:\s*\S+", f"app_file: {app_file}", existing |
| ) |
| if "sdk:" in existing: |
| existing = re.sub( |
| r"sdk:\s*\S+", f"sdk: {space_sdk}", existing |
| ) |
| Path(readme_path).write_text(existing, encoding="utf-8") |
|
|
| |
| req_path = os.path.join(tmp_dir, "requirements.txt") |
| has_python = any(f.endswith(".py") for f in files.keys()) |
|
|
| if has_python and space_sdk != "docker": |
| |
| all_py_code = "\n".join( |
| content for fname, content in files.items() |
| if fname.endswith(".py") |
| ) |
| auto_reqs = generate_requirements(all_py_code) |
|
|
| if os.path.exists(req_path): |
| |
| existing_reqs = Path(req_path).read_text(encoding="utf-8").strip() |
| merged = set() |
| for line in (existing_reqs + "\n" + auto_reqs).splitlines(): |
| line = line.strip() |
| if line and not line.startswith("#"): |
| merged.add(line) |
|
|
| Path(req_path).write_text("\n".join(sorted(merged)) + "\n", encoding="utf-8") |
| elif auto_reqs: |
| Path(req_path).write_text(auto_reqs, encoding="utf-8") |
| else: |
| |
| Path(req_path).write_text("gradio>=4.0.0\n", encoding="utf-8") |
|
|
| api.upload_folder( |
| folder_path=tmp_dir, |
| repo_id=repo_name, |
| repo_type="space" if is_space else "model", |
| token=hf_token, |
| ) |
|
|
| repo_url = f"https://huggingface.co/{repo_name}" |
| if is_space: |
| repo_url = f"https://huggingface.co/spaces/{repo_name}" |
|
|
| return { |
| "success": True, |
| "url": repo_url, |
| "repo_name": repo_name, |
| "message": f"Successfully pushed to {repo_url}", |
| } |
|
|
| except Exception as exc: |
| logger.exception("Failed to push to HuggingFace") |
| return { |
| "success": False, |
| "url": "", |
| "repo_name": repo_name, |
| "message": f"Failed to push: {str(exc)}", |
| } |
|
|