"""Code extraction from model responses. Extracts fenced code blocks and multi-file @@FILE: blocks. Normalizes language names and detects Gradio code. """ from __future__ import annotations import html import re from code.config.constants import ( CODE_BLOCK_RE, FILE_BLOCK_RE, THINKING_BLOCK_RE, ) def strip_thinking_blocks(text: str) -> str: """Remove blocks from model output.""" return THINKING_BLOCK_RE.sub("", text).strip() def extract_code(response: str) -> tuple[str, str | None]: """Return the first fenced code block and its language tag.""" visible_response = strip_thinking_blocks(response) match = CODE_BLOCK_RE.search(visible_response) if not match: return "", None return match.group(2).strip(), (match.group(1).strip().lower() or None) def extract_multi_file(response: str) -> dict[str, str]: """Extract multi-file project from @@FILE: format. Returns dict of {filepath: content}. """ files: dict[str, str] = {} visible = strip_thinking_blocks(response) for match in FILE_BLOCK_RE.finditer(visible): filepath = match.group(1).strip() content = match.group(2).strip() files[filepath] = content # Fallback: if no @@FILE: blocks found, extract single code block if not files: code, lang = extract_code(response) if code: ext_map = { "python": "main.py", "py": "main.py", "javascript": "index.js", "js": "index.js", "typescript": "index.ts", "ts": "index.ts", "html": "index.html", "css": "styles.css", "java": "Main.java", "go": "main.go", "rust": "main.rs", "php": "index.php", "ruby": "main.rb", "csharp": "Program.cs", "swift": "main.swift", "kotlin": "Main.kt", } filename = ext_map.get(lang or "", "code.txt") files[filename] = code return files def normalize_language(target_language: str | None, fence_lang: str | None) -> str: """Normalize language name to a canonical form.""" lang = (fence_lang or target_language or "python").lower() if lang in {"python", "py"}: return "python" if lang in {"html", "web", "css"}: return "web" if lang in {"javascript", "js"}: return "javascript" if lang in {"typescript", "ts"}: return "typescript" if lang == "java": return "java" if lang == "go": return "go" if lang == "rust": return "rust" if lang == "php": return "php" if lang == "ruby": return "ruby" if lang in {"csharp", "c#"}: return "csharp" if lang == "swift": return "swift" if lang == "kotlin": return "kotlin" return lang def is_gradio_code(code: str) -> bool: """Detect if Python code is a Gradio app.""" return bool( re.search( r"import\s+gradio|from\s+gradio\s+import|gr\.\s*(Interface|Blocks|TabbedInterface|ChatInterface|App)", code, ) ) # ─── Web Document / Iframe Builder ───────────────────────────────────── def _web_document(code: str, fence_lang: str | None) -> str: """Wrap code in an HTML document if needed.""" lang = (fence_lang or "").lower() if lang in {"javascript", "js"}: return f"" if lang == "css": return f"" if re.search(r"]", code, flags=re.IGNORECASE): return code return f"\n{code}\n" def build_iframe(code: str, fence_lang: str | None = None) -> str: """Build a sandboxed iframe HTML string for web preview.""" document = _web_document(code, fence_lang) srcdoc = html.escape(document, quote=True) return ( '' )