Spaces:
Running
Running
File size: 10,762 Bytes
981bd19 f74d0ab 9756402 1433b16 77bd823 3b875ca 42bbd85 1433b16 42bbd85 1433b16 42bbd85 43c8912 42bbd85 9756402 42bbd85 77bd823 42bbd85 77bd823 b8bb08b 3b875ca 42bbd85 3b875ca 83c1616 42bbd85 3b875ca 42bbd85 3b875ca 42bbd85 3b875ca fd363ed 42bbd85 3b875ca 77bd823 5f66427 77bd823 1433b16 77bd823 42bbd85 77bd823 1433b16 77bd823 1433b16 77bd823 1433b16 42bbd85 5f66427 43c8912 42bbd85 43c8912 1433b16 42bbd85 77bd823 9756402 42bbd85 77bd823 9756402 2d635f4 1433b16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
from gradio import Server
from fastapi.responses import HTMLResponse, PlainTextResponse, FileResponse # frontend + traces + vendored static assets
import mimetypes
from typing import Any, cast # to resolve PyLance freaking out over llama-cpp-python in the generate_flowchart function
from textwrap import dedent
from pathlib import Path # load the custom frontend from disk
import re # remove thinking tag from response
import json, time, uuid # agent-trace logging
from datetime import datetime, timezone
# ----- Get Model ----- #
# Download Q4_K_M GGUF file from the repo
model_path = hf_hub_download(
repo_id="unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
filename="Qwen3-Coder-30B-A3B-Instruct-UD-Q3_K_XL.gguf" # fallback: Q2_K_XL
)
# Initialize llama.cpp with the local cached path
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=2
)
# ----- Init App ----- #
app = gr.Server(title="Code-to-Flowchart Generator")
# ----- Agent traces ----- #
# Each generation appends one JSON line capturing the full LLM call (input code,
# the model's reasoning, output Mermaid + linemap, token usage, latency).
# Download the whole log from the running app at /traces .
MODEL_NAME = "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:UD-Q3_K_XL"
TRACE_PATH = Path(__file__).parent / "agent_traces.jsonl"
def write_trace(record: dict) -> None:
try:
with open(TRACE_PATH, "a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
except Exception:
pass # tracing must never break generation
# ----- Functions ----- #
# This is a cleaning function to resolve common syntax errors.
def quote_labels(text: str) -> str:
# Mermaid node labels can't hold raw code characters, so quote-wrap each label body
# A label's real closing bracket is followed by a Mermaid connector, edge-label, pipe, statement end, or EOL
# operators after a subscript (== < <= > >= != %) are never mistaken for a close.
END = r'(?=\s*(?:[-<][-.>xo]|==[>=xo]|\||;|$))'
def esc(body: str) -> str:
return (body.replace('"', "'")
.replace('[', '[').replace(']', ']')
.replace('{', '{').replace('}', '}'))
out = []
for line in text.split('\n'):
line = re.sub(r'(?<=\w)\[(.*?)\]' + END, lambda m: '["' + esc(m.group(1)) + '"]', line)
line = re.sub(r'(?<=\w)\{(.*?)\}' + END, lambda m: '{"' + esc(m.group(1)) + '"}', line)
out.append(line)
return '\n'.join(out)
# Parse the model's <linemap> block into {nodeId: [startLine, endLine]}.
# Tolerant of junk lines; drops any entry whose line(s) fall outside the source.
def parse_linemap(block: str, num_lines: int) -> dict:
out: dict = {}
for raw in block.strip().splitlines():
m = re.match(r'\s*([A-Za-z]\w*)\s*:\s*(\d+)(?:\s*-\s*(\d+))?\s*$', raw)
if not m:
continue
a = int(m.group(2))
b = int(m.group(3)) if m.group(3) else a
if a > b:
a, b = b, a
if num_lines and 1 <= a <= num_lines and 1 <= b <= num_lines:
out[m.group(1)] = [a, b]
return out
@app.api(name="generate_flowchart")
def generate_flowchart(src_code: str) -> dict:
# check if src_code is empty
if not src_code.strip(): return {"mermaid": "", "linemap": {}}
# Number the source lines so the model can cite them in the <linemap> block.
src_lines = src_code.splitlines()
num_lines = len(src_lines)
numbered = "\n".join(f"{i}| {ln}" for i, ln in enumerate(src_lines, 1))
# Set system prompt
system_prompt = dedent("""
## Role/Persona
You are a senior staff software architect and compiler engineer specializing in visual control-flow mapping. Your philosophy is pure utility: you translate raw execution logic into highly accurate, scannable, structural diagrams without any conversational filler, meta-commentary, or stylistic fluff.
## Context/Objective
The user will provide source code files or logic snippets. Your sole objective is to parse the syntax and output a corresponding, valid Mermaid.js flowchart graph. This graph will be rendered natively in a production UI to help developers audit execution paths at a glance.
## Strict Constraints
<constraints>
1. OUTPUT FORMAT: Output valid, raw Mermaid.js syntax, immediately followed by the required <linemap> block (constraint 5). Nothing else.
2. NO MARKDOWN FENCING: Do not wrap the output in ```mermaid or ``` blocks. Start directly with the Mermaid graph definition, for example: graph TD.
3. NO PROSE: Do not include introductory text, explanations, or concluding remarks. If the code cannot be parsed, output an isolated error node.
4. NODE NAMING: Paraphrase conditions into plain words — never put raw code, operators, quotes, parentheses, or square brackets/subscripts inside labels (write Index in bounds?, not i < len(nums); write Element is even?, not nums[i] % 2 == 0)
5. SOURCE MAP: The user's code is prefixed with `N| ` line numbers (these are references, never copy the `N| ` prefix into a label). After the diagram, output a <linemap> block: one `NodeId: N` per node, where N is the 1-based source line that node represents (use `NodeId: start-end` for a multi-line construct). Omit purely structural Start/End nodes that correspond to no source line.
</constraints>
<banned_vocabulary>
- Here is the flowchart
- ```mermaid
- ```
- Note:
- Explanation:
- In this diagram
- As requested
</banned_vocabulary>
## Response Workflow
Before outputting the final diagram syntax, perform structural parsing inside a hidden <thinking> tag according to these steps:
1. Identify all conditional branches, including if/else, loops, including for/while, and termination points, including return/throw.
2. Map out the execution flow nodes chronologically.
3. Verify that every opening bracket and node label matching syntax, including [ ], ( ), and { }, is perfectly balanced and closed according to Mermaid specifications.
4. Ensure no markdown formatting tags leak past the closing </thinking> tag.
## Few-Shot Examples
Input:
1| def check_status(val):
2| if val > 10:
3| return "Active"
4| else:
5| return "Inactive"
Output:
<thinking>
1. Control structures: One conditional check, two return branches.
2. Nodes: A Start, B Conditional, C Active return, D Inactive return.
3. Source lines: def is line 1, the if is line 2, Active return is line 3, Inactive return is line 5.
</thinking>
graph TD
A[Start: check_status] --> B{val > 10}
B -- True --> C[Return 'Active']
B -- False --> D[Return 'Inactive']
<linemap>
A: 1
B: 2
C: 3
D: 5
</linemap>
""").strip()
# Reset the cache per request so no cross-request bleeding
llm.reset()
# Casting else PyLance gets mad
t0 = time.perf_counter()
response = cast(Any, llm.create_chat_completion(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": numbered}
],
temperature=0.1, # Keep it quite deterministic for now
max_tokens=1024,
stream=False
))
latency_ms = round((time.perf_counter() - t0) * 1000)
raw = response["choices"][0]["message"]["content"]
usage = response.get("usage", {}) or {}
# Capture the model's hidden reasoning for the trace, then strip the tags
think = re.search(r'<thinking>(.*?)</thinking>', raw, flags=re.DOTALL)
reasoning = think.group(1).strip() if think else ""
content = re.sub(r'<thinking>.*?</thinking>', '', raw, flags=re.DOTALL)
# Extract + strip the node→line map, then validate it against the source length
linemap: dict = {}
lm = re.search(r'<linemap>(.*?)</linemap>', content, flags=re.DOTALL)
if lm:
linemap = parse_linemap(lm.group(1), num_lines)
content = content[:lm.start()] + content[lm.end():]
# Quote-wrap each node label and escape any leaked code characters
mermaid = quote_labels(content).strip() # and remove excess whitespace
# ----- Agent trace (append-only JSONL; downloadable at /traces) -----
write_trace({
"id": uuid.uuid4().hex,
"ts": datetime.now(timezone.utc).isoformat(),
"event": "generate_flowchart",
"model": MODEL_NAME,
"params": {"temperature": 0.1, "max_tokens": 1024, "n_ctx": 4096},
"input": {"src_code": src_code, "num_lines": num_lines},
"reasoning": reasoning,
"output": {"raw": raw, "mermaid": mermaid, "linemap": linemap},
"usage": {
"prompt_tokens": usage.get("prompt_tokens"),
"completion_tokens": usage.get("completion_tokens"),
"total_tokens": usage.get("total_tokens"),
},
"latency_ms": latency_ms,
"status": "ok",
})
return {"mermaid": mermaid, "linemap": linemap}
# ----- Custom Frontend ----- #
# Served from frontend.html so the same file can be opened directly in a
# browser (file://) to preview the UI without loading the model.
index_html = (Path(__file__).parent / "frontend.html").read_text(encoding="utf-8")
# Load the custom HTML
# / takes precedent over default Blocks UI
@app.get("/")
def index():
return HTMLResponse(index_html)
# Serve the vendored frontend assets (Mermaid, CodeMirror bundle, Gradio client,
# fonts) locally so the app needs NO external CDN/API at runtime.
STATIC_DIR = (Path(__file__).parent / "static").resolve()
mimetypes.add_type("text/javascript", ".js")
mimetypes.add_type("font/woff2", ".woff2")
@app.get("/static/{fname:path}")
def static_files(fname: str):
fp = (STATIC_DIR / fname).resolve()
# contain to STATIC_DIR (no path traversal) and require a real file
if not str(fp).startswith(str(STATIC_DIR) + "/") or not fp.is_file():
return PlainTextResponse("not found", status_code=404)
mt, _ = mimetypes.guess_type(str(fp))
return FileResponse(fp, media_type=mt or "application/octet-stream")
# Download every agent trace collected this run (one JSON object per line).
# curl https://<your-space>/traces > agent_traces.jsonl
@app.get("/traces")
def traces():
text = TRACE_PATH.read_text(encoding="utf-8") if TRACE_PATH.exists() else ""
return PlainTextResponse(text, media_type="application/x-ndjson",
headers={"Content-Disposition": 'attachment; filename="agent_traces.jsonl"'})
app.launch(share=False) # no external gradio.live tunnel — fully self-hosted |