"""Code extraction from model responses.
Extracts fenced code blocks and multi-file @@FILE: blocks.
Normalizes language names and detects Gradio code.
"""
from __future__ import annotations
import html
import re
from code.config.constants import (
CODE_BLOCK_RE,
FILE_BLOCK_RE,
THINKING_BLOCK_RE,
)
def strip_thinking_blocks(text: str) -> str:
"""Remove blocks from model output."""
return THINKING_BLOCK_RE.sub("", text).strip()
def extract_code(response: str) -> tuple[str, str | None]:
"""Return the first fenced code block and its language tag."""
visible_response = strip_thinking_blocks(response)
match = CODE_BLOCK_RE.search(visible_response)
if not match:
return "", None
return match.group(2).strip(), (match.group(1).strip().lower() or None)
def extract_multi_file(response: str) -> dict[str, str]:
"""Extract multi-file project from @@FILE: format.
Returns dict of {filepath: content}.
"""
files: dict[str, str] = {}
visible = strip_thinking_blocks(response)
for match in FILE_BLOCK_RE.finditer(visible):
filepath = match.group(1).strip()
content = match.group(2).strip()
files[filepath] = content
# Fallback: if no @@FILE: blocks found, extract single code block
if not files:
code, lang = extract_code(response)
if code:
ext_map = {
"python": "main.py", "py": "main.py",
"javascript": "index.js", "js": "index.js",
"typescript": "index.ts", "ts": "index.ts",
"html": "index.html",
"css": "styles.css",
"java": "Main.java",
"go": "main.go",
"rust": "main.rs",
"php": "index.php",
"ruby": "main.rb",
"csharp": "Program.cs",
"swift": "main.swift",
"kotlin": "Main.kt",
}
filename = ext_map.get(lang or "", "code.txt")
files[filename] = code
return files
def normalize_language(target_language: str | None, fence_lang: str | None) -> str:
"""Normalize language name to a canonical form."""
lang = (fence_lang or target_language or "python").lower()
if lang in {"python", "py"}:
return "python"
if lang in {"html", "web", "css"}:
return "web"
if lang in {"javascript", "js"}:
return "javascript"
if lang in {"typescript", "ts"}:
return "typescript"
if lang == "java":
return "java"
if lang == "go":
return "go"
if lang == "rust":
return "rust"
if lang == "php":
return "php"
if lang == "ruby":
return "ruby"
if lang in {"csharp", "c#"}:
return "csharp"
if lang == "swift":
return "swift"
if lang == "kotlin":
return "kotlin"
return lang
def is_gradio_code(code: str) -> bool:
"""Detect if Python code is a Gradio app."""
return bool(
re.search(
r"import\s+gradio|from\s+gradio\s+import|gr\.\s*(Interface|Blocks|TabbedInterface|ChatInterface|App)",
code,
)
)
# ─── Web Document / Iframe Builder ─────────────────────────────────────
def _web_document(code: str, fence_lang: str | None) -> str:
"""Wrap code in an HTML document if needed."""
lang = (fence_lang or "").lower()
if lang in {"javascript", "js"}:
return f"
"
if lang == "css":
return f""
if re.search(r"]", code, flags=re.IGNORECASE):
return code
return f"\n{code}\n"
def build_iframe(code: str, fence_lang: str | None = None) -> str:
"""Build a sandboxed iframe HTML string for web preview."""
document = _web_document(code, fence_lang)
srcdoc = html.escape(document, quote=True)
return (
''
)