Spaces:

R-Kentaren
/

fullstack-code-builder

Running

App Files Files Community

fullstack-code-builder / code /huggingface /push.py

R-Kentaren

Upload folder using huggingface_hub

ff86d3d verified about 19 hours ago

raw

history blame contribute delete

12.9 kB

	"""HuggingFace Hub push and project ZIP packaging.

	Creates ZIP archives from extracted project files and pushes
	projects to HuggingFace Spaces or model repos.
	"""

	from __future__ import annotations

	import logging
	import os
	import re
	import tempfile
	import zipfile
	from pathlib import Path
	from typing import Any

	from code.config.constants import MODEL_ID
	from code.huggingface.dockerfile_gen import (
	detect_framework,
	is_js_project,
	scaffold_js_project,
	)

	logger = logging.getLogger(__name__)

	# ─── Import-to-Package Mapping ──────────────────────────────────────────

	IMPORT_TO_PACKAGE: dict[str, str] = {
	"gradio": "gradio>=4.0.0",
	"flask": "flask>=3.0.0",
	"django": "django>=4.2.0",
	"fastapi": "fastapi>=0.100.0",
	"uvicorn": "uvicorn>=0.23.0",
	"streamlit": "streamlit>=1.28.0",
	"matplotlib": "matplotlib>=3.8.0",
	"PIL": "Pillow>=10.0.0",
	"Pillow": "Pillow>=10.0.0",
	"numpy": "numpy>=1.24.0",
	"pandas": "pandas>=2.0.0",
	"scipy": "scipy>=1.11.0",
	"sklearn": "scikit-learn>=1.3.0",
	"scikit_learn": "scikit-learn>=1.3.0",
	"torch": "torch>=2.1.0",
	"tensorflow": "tensorflow>=2.14.0",
	"transformers": "transformers>=4.35.0",
	"requests": "requests>=2.31.0",
	"beautifulsoup4": "beautifulsoup4>=4.12.0",
	"bs4": "beautifulsoup4>=4.12.0",
	"selenium": "selenium>=4.15.0",
	"sqlalchemy": "sqlalchemy>=2.0.0",
	"pydantic": "pydantic>=2.0.0",
	"httpx": "httpx>=0.25.0",
	"aiohttp": "aiohttp>=3.9.0",
	"opencv": "opencv-python-headless>=4.8.0",
	"cv2": "opencv-python-headless>=4.8.0",
	"plotly": "plotly>=5.18.0",
	"seaborn": "seaborn>=0.13.0",
	"wordcloud": "wordcloud>=1.9.0",
	"networkx": "networkx>=3.2.0",
	"sympy": "sympy>=1.12",
	"Pillow": "Pillow>=10.0.0",
	"skimage": "scikit-image>=0.21.0",
	"soundfile": "soundfile>=0.12.0",
	"pydub": "pydub>=0.25.1",
	"moviepy": "moviepy>=1.0.3",
	"openpyxl": "openpyxl>=3.1.0",
	"xlsxwriter": "xlsxwriter>=3.1.0",
	"python-docx": "python-docx>=0.8.11",
	"docx": "python-docx>=0.8.11",
	"reportlab": "reportlab>=4.0.0",
	"jinja2": "jinja2>=3.1.0",
	"wtforms": "wtforms>=3.1.0",
	"flask_sqlalchemy": "flask-sqlalchemy>=3.1.0",
	"flask_login": "flask-login>=0.6.0",
	"flask_wtf": "flask-wtf>=1.2.0",
	"flask_cors": "flask-cors>=4.0.0",
	}


	def _scan_imports(code: str) -> list[str]:
	"""Scan Python code for import statements and return package names."""
	packages = set()

	# Match: import xxx
	for m in re.finditer(r"^\simport\s+([a-zA-Z_][\w.])", code, re.MULTILINE):
	top_level = m.group(1).split(".")[0]
	packages.add(top_level)

	# Match: from xxx import ...
	for m in re.finditer(r"^\sfrom\s+([a-zA-Z_][\w.])", code, re.MULTILINE):
	top_level = m.group(1).split(".")[0]
	packages.add(top_level)

	return sorted(packages)


	def generate_requirements(code: str) -> str:
	"""Generate requirements.txt content from code by scanning imports.

	Returns a newline-separated string of pip package specs.
	"""
	packages = _scan_imports(code)
	reqs: list[str] = []

	for pkg in packages:
	if pkg in IMPORT_TO_PACKAGE:
	req_spec = IMPORT_TO_PACKAGE[pkg]
	if req_spec not in reqs:
	reqs.append(req_spec)
	# Skip stdlib modules (os, sys, json, re, math, etc.)

	# Always include gradio for Gradio apps if not already
	if "import gradio" in code or "from gradio" in code:
	if "gradio" not in [r.split(">=")[0].split("[")[0] for r in reqs]:
	reqs.insert(0, "gradio>=4.0.0")

	return "\n".join(reqs) + "\n" if reqs else ""


	def _find_entry_point(files: dict[str, str]) -> str:
	"""Find the main entry point file for a project.

	Looks for app.py, main.py, or any Python file with a launcher pattern.
	"""
	# Priority order for Python entry points
	candidates = ["app.py", "main.py", "index.py", "server.py", "run.py"]
	for c in candidates:
	if c in files:
	return c

	# Priority order for JS entry points
	js_candidates = ["index.js", "server.js", "src/index.js", "src/main.jsx", "src/main.tsx"]
	for c in js_candidates:
	if c in files:
	return c

	# Look for any .py file with if __name__ == "__main__" or .launch()
	for fname, content in files.items():
	if fname.endswith(".py"):
	if "__main__" in content or ".launch(" in content or "app.run(" in content:
	return fname

	# Fall back to first .py file
	for fname in files:
	if fname.endswith(".py"):
	return fname

	# Fall back to first file
	return next(iter(files), "app.py")


	def _detect_sdk(files: dict[str, str], entry: str) -> str:
	"""Auto-detect the best Space SDK from the project files."""
	all_code = "\n".join(files.values())

	if "import streamlit" in all_code or "from streamlit" in all_code:
	return "streamlit"
	if "import gradio" in all_code or "from gradio" in all_code:
	return "gradio"

	# JS/TS projects → Docker
	if is_js_project(files):
	return "docker"

	if any(f.endswith(".html") for f in files):
	return "static"
	if entry.endswith(".py"):
	return "gradio" # Default Python to Gradio SDK

	return "static"


	def create_project_zip(files: dict[str, str], project_name: str) -> str:
	"""Create a ZIP file from extracted project files.

	Returns the path to the created ZIP file.
	"""
	zip_dir = tempfile.mkdtemp(prefix="fullstack_project_")
	zip_path = os.path.join(zip_dir, f"{project_name}.zip")

	with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
	for filepath, content in files.items():
	zf.writestr(f"{project_name}/{filepath}", content)

	return zip_path


	def push_to_huggingface(
	files: dict[str, str],
	project_name: str,
	repo_name: str,
	hf_token: str,
	space_sdk: str = "static",
	is_space: bool = True,
	) -> dict[str, Any]:
	"""Push generated project to HuggingFace Hub.

	Creates the repo if it doesn't exist, writes all files,
	and adds README.md, Dockerfile, package.json, and requirements.txt as needed.
	"""
	try:
	from huggingface_hub import HfApi, create_repo

	api = HfApi(token=hf_token)

	if "/" in repo_name:
	namespace, name = repo_name.split("/", 1)
	else:
	user_info = api.whoami()
	namespace = user_info["name"]
	name = repo_name
	repo_name = f"{namespace}/{name}"

	# Find entry point and auto-detect SDK
	entry_point = _find_entry_point(files)
	detected_sdk = _detect_sdk(files, entry_point)

	# Use detected SDK if user left it as "static" but project needs something else
	if space_sdk == "static" and detected_sdk != "static":
	space_sdk = detected_sdk

	# For JS projects, scaffold Docker support files
	if is_js_project(files) or space_sdk == "docker":
	framework = detect_framework(files)
	if framework == "static":
	# Single HTML file or simple JS — keep as static
	if any(f.endswith(".html") for f in files) and not is_js_project(files):
	space_sdk = "static"
	else:
	framework = "nodejs"
	space_sdk = "docker"

	if space_sdk == "docker":
	files = scaffold_js_project(files, framework, project_name)

	try:
	if is_space:
	create_repo(
	repo_id=repo_name,
	repo_type="space",
	space_sdk=space_sdk,
	token=hf_token,
	exist_ok=True,
	)
	else:
	create_repo(
	repo_id=repo_name,
	repo_type="model",
	token=hf_token,
	exist_ok=True,
	)
	except Exception as e:
	logger.warning("Repo creation warning: %s", e)

	with tempfile.TemporaryDirectory(prefix="hf_push_") as tmp_dir:
	# Write all project files
	for filepath, content in files.items():
	full_path = os.path.join(tmp_dir, filepath)
	os.makedirs(os.path.dirname(full_path), exist_ok=True)
	Path(full_path).write_text(content, encoding="utf-8")

	# Ensure the entry point is named app.py for HF Spaces (Python)
	if entry_point != "app.py" and entry_point.endswith(".py") and is_space and space_sdk in ("gradio", "streamlit"):
	src = os.path.join(tmp_dir, entry_point)
	dst = os.path.join(tmp_dir, "app.py")
	if os.path.exists(src) and not os.path.exists(dst):
	import shutil
	shutil.copy2(src, dst)

	# Determine app_file for README
	if space_sdk == "docker":
	app_file = "Dockerfile"
	elif space_sdk in ("gradio", "streamlit"):
	app_file = "app.py"
	elif space_sdk == "static":
	if "index.html" in files:
	app_file = "index.html"
	elif any(f.endswith(".html") for f in files):
	app_file = next(f for f in files if f.endswith(".html"))
	else:
	app_file = entry_point
	else:
	app_file = entry_point

	# Add README.md if not present
	readme_path = os.path.join(tmp_dir, "README.md")
	if not os.path.exists(readme_path):
	readme_content = f"""---
	title: {name}
	emoji: 🚀
	colorFrom: blue
	colorTo: purple
	sdk: {space_sdk}
	app_file: {app_file}
	pinned: false
	---

	# {name}

	Generated by Fullstack Code Builder using {MODEL_ID}.
	"""
	Path(readme_path).write_text(readme_content, encoding="utf-8")
	else:
	# Update app_file in existing README to match entry point
	existing = Path(readme_path).read_text(encoding="utf-8")
	if "app_file:" in existing:
	existing = re.sub(
	r"app_file:\s*\S+", f"app_file: {app_file}", existing
	)
	if "sdk:" in existing:
	existing = re.sub(
	r"sdk:\s*\S+", f"sdk: {space_sdk}", existing
	)
	Path(readme_path).write_text(existing, encoding="utf-8")

	# Add/merge requirements.txt for Python projects
	req_path = os.path.join(tmp_dir, "requirements.txt")
	has_python = any(f.endswith(".py") for f in files.keys())

	if has_python and space_sdk != "docker":
	# Scan all Python code for imports
	all_py_code = "\n".join(
	content for fname, content in files.items()
	if fname.endswith(".py")
	)
	auto_reqs = generate_requirements(all_py_code)

	if os.path.exists(req_path):
	# Merge with existing requirements.txt
	existing_reqs = Path(req_path).read_text(encoding="utf-8").strip()
	merged = set()
	for line in (existing_reqs + "\n" + auto_reqs).splitlines():
	line = line.strip()
	if line and not line.startswith("#"):
	merged.add(line)

	Path(req_path).write_text("\n".join(sorted(merged)) + "\n", encoding="utf-8")
	elif auto_reqs:
	Path(req_path).write_text(auto_reqs, encoding="utf-8")
	else:
	# Minimal requirements for Python Spaces
	Path(req_path).write_text("gradio>=4.0.0\n", encoding="utf-8")

	api.upload_folder(
	folder_path=tmp_dir,
	repo_id=repo_name,
	repo_type="space" if is_space else "model",
	token=hf_token,
	)

	repo_url = f"https://huggingface.co/{repo_name}"
	if is_space:
	repo_url = f"https://huggingface.co/spaces/{repo_name}"

	return {
	"success": True,
	"url": repo_url,
	"repo_name": repo_name,
	"message": f"Successfully pushed to {repo_url}",
	}

	except Exception as exc:
	logger.exception("Failed to push to HuggingFace")
	return {
	"success": False,
	"url": "",
	"repo_name": repo_name,
	"message": f"Failed to push: {str(exc)}",
	}