R-Kentaren commited on
Commit
75a6442
·
verified ·
1 Parent(s): 426783d

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. code/huggingface/push.py +206 -17
  2. code/server/routes.py +33 -13
code/huggingface/push.py CHANGED
@@ -8,6 +8,7 @@ from __future__ import annotations
8
 
9
  import logging
10
  import os
 
11
  import tempfile
12
  import zipfile
13
  from pathlib import Path
@@ -17,6 +18,141 @@ from code.config.constants import MODEL_ID
17
 
18
  logger = logging.getLogger(__name__)
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def create_project_zip(files: dict[str, str], project_name: str) -> str:
22
  """Create a ZIP file from extracted project files.
@@ -59,6 +195,14 @@ def push_to_huggingface(
59
  name = repo_name
60
  repo_name = f"{namespace}/{name}"
61
 
 
 
 
 
 
 
 
 
62
  try:
63
  if is_space:
64
  create_repo(
@@ -79,12 +223,30 @@ def push_to_huggingface(
79
  logger.warning("Repo creation warning: %s", e)
80
 
81
  with tempfile.TemporaryDirectory(prefix="hf_push_") as tmp_dir:
 
82
  for filepath, content in files.items():
83
  full_path = os.path.join(tmp_dir, filepath)
84
  os.makedirs(os.path.dirname(full_path), exist_ok=True)
85
  Path(full_path).write_text(content, encoding="utf-8")
86
 
87
- # Add README if not present
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  readme_path = os.path.join(tmp_dir, "README.md")
89
  if not os.path.exists(readme_path):
90
  readme_content = f"""---
@@ -93,7 +255,8 @@ emoji: 🚀
93
  colorFrom: blue
94
  colorTo: purple
95
  sdk: {space_sdk}
96
- app_file: app.py
 
97
  ---
98
 
99
  # {name}
@@ -101,23 +264,49 @@ app_file: app.py
101
  Generated by Fullstack Code Builder using {MODEL_ID}.
102
  """
103
  Path(readme_path).write_text(readme_content, encoding="utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # Add requirements.txt for Python/Gradio projects
106
  req_path = os.path.join(tmp_dir, "requirements.txt")
107
- if not os.path.exists(req_path):
108
- has_python = any(f.endswith(".py") for f in files.keys())
109
- if has_python:
110
- reqs = ["gradio>=4.0.0"]
111
- all_code = "\n".join(files.values())
112
- if "matplotlib" in all_code:
113
- reqs.append("matplotlib>=3.8")
114
- if "PIL" in all_code or "Pillow" in all_code:
115
- reqs.append("Pillow>=10.0")
116
- if "numpy" in all_code:
117
- reqs.append("numpy>=1.24")
118
- if "pandas" in all_code:
119
- reqs.append("pandas>=2.0")
120
- Path(req_path).write_text("\n".join(reqs) + "\n", encoding="utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  api.upload_folder(
123
  folder_path=tmp_dir,
 
8
 
9
  import logging
10
  import os
11
+ import re
12
  import tempfile
13
  import zipfile
14
  from pathlib import Path
 
18
 
19
  logger = logging.getLogger(__name__)
20
 
21
+ # ─── Import-to-Package Mapping ──────────────────────────────────────────
22
+
23
+ IMPORT_TO_PACKAGE: dict[str, str] = {
24
+ "gradio": "gradio>=4.0.0",
25
+ "flask": "flask>=3.0.0",
26
+ "django": "django>=4.2.0",
27
+ "fastapi": "fastapi>=0.100.0",
28
+ "uvicorn": "uvicorn>=0.23.0",
29
+ "streamlit": "streamlit>=1.28.0",
30
+ "matplotlib": "matplotlib>=3.8.0",
31
+ "PIL": "Pillow>=10.0.0",
32
+ "Pillow": "Pillow>=10.0.0",
33
+ "numpy": "numpy>=1.24.0",
34
+ "pandas": "pandas>=2.0.0",
35
+ "scipy": "scipy>=1.11.0",
36
+ "sklearn": "scikit-learn>=1.3.0",
37
+ "scikit_learn": "scikit-learn>=1.3.0",
38
+ "torch": "torch>=2.1.0",
39
+ "tensorflow": "tensorflow>=2.14.0",
40
+ "transformers": "transformers>=4.35.0",
41
+ "requests": "requests>=2.31.0",
42
+ "beautifulsoup4": "beautifulsoup4>=4.12.0",
43
+ "bs4": "beautifulsoup4>=4.12.0",
44
+ "selenium": "selenium>=4.15.0",
45
+ "sqlalchemy": "sqlalchemy>=2.0.0",
46
+ "pydantic": "pydantic>=2.0.0",
47
+ "httpx": "httpx>=0.25.0",
48
+ "aiohttp": "aiohttp>=3.9.0",
49
+ "opencv": "opencv-python-headless>=4.8.0",
50
+ "cv2": "opencv-python-headless>=4.8.0",
51
+ "plotly": "plotly>=5.18.0",
52
+ "seaborn": "seaborn>=0.13.0",
53
+ "wordcloud": "wordcloud>=1.9.0",
54
+ "networkx": "networkx>=3.2.0",
55
+ "sympy": "sympy>=1.12",
56
+ "Pillow": "Pillow>=10.0.0",
57
+ "skimage": "scikit-image>=0.21.0",
58
+ "soundfile": "soundfile>=0.12.0",
59
+ "pydub": "pydub>=0.25.1",
60
+ "moviepy": "moviepy>=1.0.3",
61
+ "openpyxl": "openpyxl>=3.1.0",
62
+ "xlsxwriter": "xlsxwriter>=3.1.0",
63
+ "python-docx": "python-docx>=0.8.11",
64
+ "docx": "python-docx>=0.8.11",
65
+ "reportlab": "reportlab>=4.0.0",
66
+ "jinja2": "jinja2>=3.1.0",
67
+ "wtforms": "wtforms>=3.1.0",
68
+ "flask_sqlalchemy": "flask-sqlalchemy>=3.1.0",
69
+ "flask_login": "flask-login>=0.6.0",
70
+ "flask_wtf": "flask-wtf>=1.2.0",
71
+ "flask_cors": "flask-cors>=4.0.0",
72
+ }
73
+
74
+
75
+ def _scan_imports(code: str) -> list[str]:
76
+ """Scan Python code for import statements and return package names."""
77
+ packages = set()
78
+
79
+ # Match: import xxx
80
+ for m in re.finditer(r"^\s*import\s+([a-zA-Z_][\w.]*)", code, re.MULTILINE):
81
+ top_level = m.group(1).split(".")[0]
82
+ packages.add(top_level)
83
+
84
+ # Match: from xxx import ...
85
+ for m in re.finditer(r"^\s*from\s+([a-zA-Z_][\w.]*)", code, re.MULTILINE):
86
+ top_level = m.group(1).split(".")[0]
87
+ packages.add(top_level)
88
+
89
+ return sorted(packages)
90
+
91
+
92
+ def generate_requirements(code: str) -> str:
93
+ """Generate requirements.txt content from code by scanning imports.
94
+
95
+ Returns a newline-separated string of pip package specs.
96
+ """
97
+ packages = _scan_imports(code)
98
+ reqs: list[str] = []
99
+
100
+ for pkg in packages:
101
+ if pkg in IMPORT_TO_PACKAGE:
102
+ req_spec = IMPORT_TO_PACKAGE[pkg]
103
+ if req_spec not in reqs:
104
+ reqs.append(req_spec)
105
+ # Skip stdlib modules (os, sys, json, re, math, etc.)
106
+
107
+ # Always include gradio for Gradio apps if not already
108
+ if "import gradio" in code or "from gradio" in code:
109
+ if "gradio" not in [r.split(">=")[0].split("[")[0] for r in reqs]:
110
+ reqs.insert(0, "gradio>=4.0.0")
111
+
112
+ return "\n".join(reqs) + "\n" if reqs else ""
113
+
114
+
115
+ def _find_entry_point(files: dict[str, str]) -> str:
116
+ """Find the main entry point file for a project.
117
+
118
+ Looks for app.py, main.py, or any Python file with a launcher pattern.
119
+ """
120
+ # Priority order for entry points
121
+ candidates = ["app.py", "main.py", "index.py", "server.py", "run.py"]
122
+ for c in candidates:
123
+ if c in files:
124
+ return c
125
+
126
+ # Look for any .py file with if __name__ == "__main__" or .launch()
127
+ for fname, content in files.items():
128
+ if fname.endswith(".py"):
129
+ if "__main__" in content or ".launch(" in content or "app.run(" in content:
130
+ return fname
131
+
132
+ # Fall back to first .py file
133
+ for fname in files:
134
+ if fname.endswith(".py"):
135
+ return fname
136
+
137
+ # Fall back to first file
138
+ return next(iter(files), "app.py")
139
+
140
+
141
+ def _detect_sdk(files: dict[str, str], entry: str) -> str:
142
+ """Auto-detect the best Space SDK from the project files."""
143
+ all_code = "\n".join(files.values())
144
+
145
+ if "import streamlit" in all_code or "from streamlit" in all_code:
146
+ return "streamlit"
147
+ if "import gradio" in all_code or "from gradio" in all_code:
148
+ return "gradio"
149
+ if any(f.endswith(".html") for f in files):
150
+ return "static"
151
+ if entry.endswith(".py"):
152
+ return "gradio" # Default Python to Gradio SDK
153
+
154
+ return "static"
155
+
156
 
157
  def create_project_zip(files: dict[str, str], project_name: str) -> str:
158
  """Create a ZIP file from extracted project files.
 
195
  name = repo_name
196
  repo_name = f"{namespace}/{name}"
197
 
198
+ # Find entry point and auto-detect SDK
199
+ entry_point = _find_entry_point(files)
200
+ detected_sdk = _detect_sdk(files, entry_point)
201
+
202
+ # Use detected SDK if user left it as "static" but project is Python
203
+ if space_sdk == "static" and detected_sdk != "static":
204
+ space_sdk = detected_sdk
205
+
206
  try:
207
  if is_space:
208
  create_repo(
 
223
  logger.warning("Repo creation warning: %s", e)
224
 
225
  with tempfile.TemporaryDirectory(prefix="hf_push_") as tmp_dir:
226
+ # Write all project files
227
  for filepath, content in files.items():
228
  full_path = os.path.join(tmp_dir, filepath)
229
  os.makedirs(os.path.dirname(full_path), exist_ok=True)
230
  Path(full_path).write_text(content, encoding="utf-8")
231
 
232
+ # Ensure the entry point is named app.py for HF Spaces
233
+ if entry_point != "app.py" and entry_point.endswith(".py") and is_space:
234
+ src = os.path.join(tmp_dir, entry_point)
235
+ dst = os.path.join(tmp_dir, "app.py")
236
+ if os.path.exists(src) and not os.path.exists(dst):
237
+ import shutil
238
+ shutil.copy2(src, dst)
239
+
240
+ # Determine app_file for README
241
+ app_file = "app.py" if is_space and space_sdk in ("gradio", "streamlit") else entry_point
242
+ if is_space and space_sdk == "static":
243
+ # For static spaces, look for index.html
244
+ if "index.html" in files:
245
+ app_file = "index.html"
246
+ elif any(f.endswith(".html") for f in files):
247
+ app_file = next(f for f in files if f.endswith(".html"))
248
+
249
+ # Add README.md if not present
250
  readme_path = os.path.join(tmp_dir, "README.md")
251
  if not os.path.exists(readme_path):
252
  readme_content = f"""---
 
255
  colorFrom: blue
256
  colorTo: purple
257
  sdk: {space_sdk}
258
+ app_file: {app_file}
259
+ pinned: false
260
  ---
261
 
262
  # {name}
 
264
  Generated by Fullstack Code Builder using {MODEL_ID}.
265
  """
266
  Path(readme_path).write_text(readme_content, encoding="utf-8")
267
+ else:
268
+ # Update app_file in existing README to match entry point
269
+ existing = Path(readme_path).read_text(encoding="utf-8")
270
+ if "app_file:" in existing:
271
+ existing = re.sub(
272
+ r"app_file:\s*\S+", f"app_file: {app_file}", existing
273
+ )
274
+ if "sdk:" in existing:
275
+ existing = re.sub(
276
+ r"sdk:\s*\S+", f"sdk: {space_sdk}", existing
277
+ )
278
+ Path(readme_path).write_text(existing, encoding="utf-8")
279
 
280
+ # Add/merge requirements.txt for Python projects
281
  req_path = os.path.join(tmp_dir, "requirements.txt")
282
+ has_python = any(f.endswith(".py") for f in files.keys())
283
+
284
+ if has_python:
285
+ # Scan all Python code for imports
286
+ all_py_code = "\n".join(
287
+ content for fname, content in files.items()
288
+ if fname.endswith(".py")
289
+ )
290
+ auto_reqs = generate_requirements(all_py_code)
291
+
292
+ if os.path.exists(req_path):
293
+ # Merge with existing requirements.txt
294
+ existing_reqs = Path(req_path).read_text(encoding="utf-8").strip()
295
+ merged = set()
296
+ for line in (existing_reqs + "\n" + auto_reqs).splitlines():
297
+ line = line.strip()
298
+ if line and not line.startswith("#"):
299
+ # Normalize: take the package name (before >=, ==, etc.)
300
+ pkg_name = re.split(r"[><=!~]", line)[0].strip().lower()
301
+ # Keep the more specific version spec
302
+ merged.add(line)
303
+
304
+ Path(req_path).write_text("\n".join(sorted(merged)) + "\n", encoding="utf-8")
305
+ elif auto_reqs:
306
+ Path(req_path).write_text(auto_reqs, encoding="utf-8")
307
+ else:
308
+ # Minimal requirements for Python Spaces
309
+ Path(req_path).write_text("gradio>=4.0.0\n", encoding="utf-8")
310
 
311
  api.upload_folder(
312
  folder_path=tmp_dir,
code/server/routes.py CHANGED
@@ -406,7 +406,22 @@ def handle_chat(
406
 
407
  # Register code for download
408
  download_url = None
409
- project_files = multi_files if multi_files else {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
  if project_files:
412
  project_name = "generated-project"
@@ -466,23 +481,16 @@ def handle_push_hf(
466
  """Push generated project to HuggingFace Hub."""
467
  try:
468
  execution_context = json.loads(exec_context_json) if exec_context_json else {}
469
- project_files = execution_context.get("project_files", {})
470
-
471
- if not project_files:
472
- code = execution_context.get("code", "")
473
- if not code:
474
- yield json.dumps({
475
- "success": False,
476
- "message": "No code to push. Generate some code first.",
477
- "url": "",
478
- })
479
- return
480
 
 
 
481
  lang = execution_context.get("language", "python")
482
  is_gradio = execution_context.get("is_gradio", False)
483
  ext_map = {
484
  "python": "app.py", "py": "app.py",
485
- "javascript": "index.js", "js": "index.js",
486
  "html": "index.html", "web": "index.html",
487
  "typescript": "index.ts", "ts": "index.ts",
488
  }
@@ -493,6 +501,18 @@ def handle_push_hf(
493
  if is_gradio or is_gradio_code(code):
494
  space_sdk = "gradio"
495
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  project_name = repo_name.split("/")[-1] if "/" in repo_name else repo_name
497
 
498
  result = push_to_huggingface(
 
406
 
407
  # Register code for download
408
  download_url = None
409
+ project_files = dict(multi_files) if multi_files else {}
410
+
411
+ # Rename main.py → app.py for Python/Gradio projects (HF Spaces expects app.py)
412
+ if project_files and "main.py" in project_files and "app.py" not in project_files:
413
+ if target == "python" or is_gradio:
414
+ project_files["app.py"] = project_files.pop("main.py")
415
+
416
+ # If project_files is empty but we have single code, add it
417
+ if not project_files and code:
418
+ if target == "python":
419
+ fname = "app.py" if (is_gradio or is_gradio_code(code)) else "main.py"
420
+ elif target in {"web", "html", "javascript"}:
421
+ fname = "index.html"
422
+ else:
423
+ fname = f"main.{fence_lang or 'txt'}"
424
+ project_files = {fname: code}
425
 
426
  if project_files:
427
  project_name = "generated-project"
 
481
  """Push generated project to HuggingFace Hub."""
482
  try:
483
  execution_context = json.loads(exec_context_json) if exec_context_json else {}
484
+ project_files = dict(execution_context.get("project_files", {}) or {})
485
+ code = execution_context.get("code", "")
 
 
 
 
 
 
 
 
 
486
 
487
+ # If project_files is empty but we have code, build files from code
488
+ if not project_files and code:
489
  lang = execution_context.get("language", "python")
490
  is_gradio = execution_context.get("is_gradio", False)
491
  ext_map = {
492
  "python": "app.py", "py": "app.py",
493
+ "javascript": "index.html", "js": "index.html",
494
  "html": "index.html", "web": "index.html",
495
  "typescript": "index.ts", "ts": "index.ts",
496
  }
 
501
  if is_gradio or is_gradio_code(code):
502
  space_sdk = "gradio"
503
 
504
+ # If still no files, try extracting from the raw response
505
+ if not project_files and code:
506
+ project_files = extract_multi_file(code)
507
+
508
+ if not project_files:
509
+ yield json.dumps({
510
+ "success": False,
511
+ "message": "No code to push. Generate some code first.",
512
+ "url": "",
513
+ })
514
+ return
515
+
516
  project_name = repo_name.split("/")[-1] if "/" in repo_name else repo_name
517
 
518
  result = push_to_huggingface(