Spaces:
Running
Running
File size: 4,445 Bytes
ccb935d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | """Code extraction from model responses.
Extracts fenced code blocks and multi-file @@FILE: blocks.
Normalizes language names and detects Gradio code.
"""
from __future__ import annotations
import html
import re
from code.config.constants import (
CODE_BLOCK_RE,
FILE_BLOCK_RE,
THINKING_BLOCK_RE,
)
def strip_thinking_blocks(text: str) -> str:
"""Remove <think/> blocks from model output."""
return THINKING_BLOCK_RE.sub("", text).strip()
def extract_code(response: str) -> tuple[str, str | None]:
"""Return the first fenced code block and its language tag."""
visible_response = strip_thinking_blocks(response)
match = CODE_BLOCK_RE.search(visible_response)
if not match:
return "", None
return match.group(2).strip(), (match.group(1).strip().lower() or None)
def extract_multi_file(response: str) -> dict[str, str]:
"""Extract multi-file project from @@FILE: format.
Returns dict of {filepath: content}.
"""
files: dict[str, str] = {}
visible = strip_thinking_blocks(response)
for match in FILE_BLOCK_RE.finditer(visible):
filepath = match.group(1).strip()
content = match.group(2).strip()
files[filepath] = content
# Fallback: if no @@FILE: blocks found, extract single code block
if not files:
code, lang = extract_code(response)
if code:
ext_map = {
"python": "main.py", "py": "main.py",
"javascript": "index.js", "js": "index.js",
"typescript": "index.ts", "ts": "index.ts",
"html": "index.html",
"css": "styles.css",
"java": "Main.java",
"go": "main.go",
"rust": "main.rs",
"php": "index.php",
"ruby": "main.rb",
"csharp": "Program.cs",
"swift": "main.swift",
"kotlin": "Main.kt",
}
filename = ext_map.get(lang or "", "code.txt")
files[filename] = code
return files
def normalize_language(target_language: str | None, fence_lang: str | None) -> str:
"""Normalize language name to a canonical form."""
lang = (fence_lang or target_language or "python").lower()
if lang in {"python", "py"}:
return "python"
if lang in {"html", "web", "css"}:
return "web"
if lang in {"javascript", "js"}:
return "javascript"
if lang in {"typescript", "ts"}:
return "typescript"
if lang == "java":
return "java"
if lang == "go":
return "go"
if lang == "rust":
return "rust"
if lang == "php":
return "php"
if lang == "ruby":
return "ruby"
if lang in {"csharp", "c#"}:
return "csharp"
if lang == "swift":
return "swift"
if lang == "kotlin":
return "kotlin"
return lang
def is_gradio_code(code: str) -> bool:
"""Detect if Python code is a Gradio app."""
return bool(
re.search(
r"import\s+gradio|from\s+gradio\s+import|gr\.\s*(Interface|Blocks|TabbedInterface|ChatInterface|App)",
code,
)
)
# βββ Web Document / Iframe Builder βββββββββββββββββββββββββββββββββββββ
def _web_document(code: str, fence_lang: str | None) -> str:
"""Wrap code in an HTML document if needed."""
lang = (fence_lang or "").lower()
if lang in {"javascript", "js"}:
return f"<!doctype html><html><body><script>\n{code}\n</script></body></html>"
if lang == "css":
return f"<!doctype html><html><head><style>\n{code}\n</style></head><body></body></html>"
if re.search(r"<!doctype|<html[\s>]", code, flags=re.IGNORECASE):
return code
return f"<!doctype html><html><head><meta charset='utf-8'></head><body>\n{code}\n</body></html>"
def build_iframe(code: str, fence_lang: str | None = None) -> str:
"""Build a sandboxed iframe HTML string for web preview."""
document = _web_document(code, fence_lang)
srcdoc = html.escape(document, quote=True)
return (
'<iframe class="web-frame" '
'sandbox="allow-scripts" '
'allow="fullscreen" '
"allowfullscreen "
f'srcdoc="{srcdoc}" '
'style="width:100%; min-height:680px; border:0; border-radius:14px; '
'background:white;"></iframe>'
)
|