File size: 10,762 Bytes
981bd19
 
 
f74d0ab
9756402
1433b16
 
77bd823
3b875ca
42bbd85
 
1433b16
 
42bbd85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1433b16
 
 
 
 
 
 
 
 
 
 
 
 
 
42bbd85
 
 
 
 
 
 
 
 
 
 
 
 
43c8912
 
 
 
 
 
 
 
42bbd85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9756402
42bbd85
77bd823
42bbd85
 
 
 
 
 
77bd823
b8bb08b
3b875ca
 
 
 
 
 
 
 
 
42bbd85
3b875ca
 
83c1616
42bbd85
3b875ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42bbd85
 
 
 
 
3b875ca
 
 
 
 
42bbd85
3b875ca
 
 
fd363ed
 
42bbd85
 
 
 
 
 
3b875ca
77bd823
5f66427
 
 
77bd823
1433b16
77bd823
 
 
42bbd85
77bd823
 
 
 
 
1433b16
77bd823
1433b16
 
77bd823
1433b16
 
 
 
42bbd85
 
 
 
 
 
 
5f66427
43c8912
42bbd85
43c8912
1433b16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42bbd85
77bd823
9756402
42bbd85
 
 
77bd823
9756402
 
 
 
 
2d635f4
1433b16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
from gradio import Server
from fastapi.responses import HTMLResponse, PlainTextResponse, FileResponse # frontend + traces + vendored static assets
import mimetypes
from typing import Any, cast # to resolve PyLance freaking out over llama-cpp-python in the generate_flowchart function
from textwrap import dedent
from pathlib import Path # load the custom frontend from disk
import re # remove thinking tag from response
import json, time, uuid # agent-trace logging
from datetime import datetime, timezone

# ----- Get Model ----- #
# Download Q4_K_M GGUF file from the repo
model_path = hf_hub_download(
    repo_id="unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
    filename="Qwen3-Coder-30B-A3B-Instruct-UD-Q3_K_XL.gguf" # fallback: Q2_K_XL
)

# Initialize llama.cpp with the local cached path
llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=2
)

# ----- Init App ----- #
app = gr.Server(title="Code-to-Flowchart Generator")

# ----- Agent traces ----- #
# Each generation appends one JSON line capturing the full LLM call (input code,
# the model's reasoning, output Mermaid + linemap, token usage, latency).
# Download the whole log from the running app at  /traces .
MODEL_NAME = "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:UD-Q3_K_XL"
TRACE_PATH = Path(__file__).parent / "agent_traces.jsonl"

def write_trace(record: dict) -> None:
    try:
        with open(TRACE_PATH, "a", encoding="utf-8") as f:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")
    except Exception:
        pass  # tracing must never break generation

# ----- Functions ----- #

# This is a cleaning function to resolve common syntax errors.
def quote_labels(text: str) -> str:
    # Mermaid node labels can't hold raw code characters, so quote-wrap each label body
    # A label's real closing bracket is followed by a Mermaid connector, edge-label, pipe, statement end, or EOL
    # operators after a subscript (== < <= > >= != %) are never mistaken for a close.
    END = r'(?=\s*(?:[-<][-.>xo]|==[>=xo]|\||;|$))'

    def esc(body: str) -> str:
        return (body.replace('"', "'")
                    .replace('[', '&#91;').replace(']', '&#93;')
                    .replace('{', '&#123;').replace('}', '&#125;'))

    out = []
    for line in text.split('\n'):
        line = re.sub(r'(?<=\w)\[(.*?)\]' + END, lambda m: '["' + esc(m.group(1)) + '"]', line)
        line = re.sub(r'(?<=\w)\{(.*?)\}' + END, lambda m: '{"' + esc(m.group(1)) + '"}', line)
        out.append(line)
    return '\n'.join(out)

# Parse the model's <linemap> block into {nodeId: [startLine, endLine]}.
# Tolerant of junk lines; drops any entry whose line(s) fall outside the source.
def parse_linemap(block: str, num_lines: int) -> dict:
    out: dict = {}
    for raw in block.strip().splitlines():
        m = re.match(r'\s*([A-Za-z]\w*)\s*:\s*(\d+)(?:\s*-\s*(\d+))?\s*$', raw)
        if not m:
            continue
        a = int(m.group(2))
        b = int(m.group(3)) if m.group(3) else a
        if a > b:
            a, b = b, a
        if num_lines and 1 <= a <= num_lines and 1 <= b <= num_lines:
            out[m.group(1)] = [a, b]
    return out

@app.api(name="generate_flowchart")
def generate_flowchart(src_code: str) -> dict:
    # check if src_code is empty
    if not src_code.strip(): return {"mermaid": "", "linemap": {}}

    # Number the source lines so the model can cite them in the <linemap> block.
    src_lines = src_code.splitlines()
    num_lines = len(src_lines)
    numbered = "\n".join(f"{i}| {ln}" for i, ln in enumerate(src_lines, 1))

    # Set system prompt
    system_prompt = dedent("""
    ## Role/Persona
    You are a senior staff software architect and compiler engineer specializing in visual control-flow mapping. Your philosophy is pure utility: you translate raw execution logic into highly accurate, scannable, structural diagrams without any conversational filler, meta-commentary, or stylistic fluff.

    ## Context/Objective
    The user will provide source code files or logic snippets. Your sole objective is to parse the syntax and output a corresponding, valid Mermaid.js flowchart graph. This graph will be rendered natively in a production UI to help developers audit execution paths at a glance.

    ## Strict Constraints
    <constraints>
    1. OUTPUT FORMAT: Output valid, raw Mermaid.js syntax, immediately followed by the required <linemap> block (constraint 5). Nothing else.
    2. NO MARKDOWN FENCING: Do not wrap the output in ```mermaid or ``` blocks. Start directly with the Mermaid graph definition, for example: graph TD.
    3. NO PROSE: Do not include introductory text, explanations, or concluding remarks. If the code cannot be parsed, output an isolated error node.
    4. NODE NAMING: Paraphrase conditions into plain words — never put raw code, operators, quotes, parentheses, or square brackets/subscripts inside labels (write Index in bounds?, not i < len(nums); write Element is even?, not nums[i] % 2 == 0)
    5. SOURCE MAP: The user's code is prefixed with `N| ` line numbers (these are references, never copy the `N| ` prefix into a label). After the diagram, output a <linemap> block: one `NodeId: N` per node, where N is the 1-based source line that node represents (use `NodeId: start-end` for a multi-line construct). Omit purely structural Start/End nodes that correspond to no source line.
    </constraints>

    <banned_vocabulary>
    - Here is the flowchart
    - ```mermaid
    - ```
    - Note:
    - Explanation:
    - In this diagram
    - As requested
    </banned_vocabulary>

    ## Response Workflow
    Before outputting the final diagram syntax, perform structural parsing inside a hidden <thinking> tag according to these steps:
    1. Identify all conditional branches, including if/else, loops, including for/while, and termination points, including return/throw.
    2. Map out the execution flow nodes chronologically.
    3. Verify that every opening bracket and node label matching syntax, including [ ], ( ), and { }, is perfectly balanced and closed according to Mermaid specifications.
    4. Ensure no markdown formatting tags leak past the closing </thinking> tag.

    ## Few-Shot Examples

    Input:
    1| def check_status(val):
    2|     if val > 10:
    3|         return "Active"
    4|     else:
    5|         return "Inactive"

    Output:
    <thinking>
    1. Control structures: One conditional check, two return branches.
    2. Nodes: A Start, B Conditional, C Active return, D Inactive return.
    3. Source lines: def is line 1, the if is line 2, Active return is line 3, Inactive return is line 5.
    </thinking>
    graph TD
        A[Start: check_status] --> B{val > 10}
        B -- True --> C[Return 'Active']
        B -- False --> D[Return 'Inactive']
    <linemap>
    A: 1
    B: 2
    C: 3
    D: 5
    </linemap>
    """).strip()

    # Reset the cache per request so no cross-request bleeding
    llm.reset()

    # Casting else PyLance gets mad
    t0 = time.perf_counter()
    response = cast(Any, llm.create_chat_completion(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": numbered}
        ],
        temperature=0.1, # Keep it quite deterministic for now
        max_tokens=1024,
        stream=False
    ))
    latency_ms = round((time.perf_counter() - t0) * 1000)

    raw = response["choices"][0]["message"]["content"]
    usage = response.get("usage", {}) or {}

    # Capture the model's hidden reasoning for the trace, then strip the tags
    think = re.search(r'<thinking>(.*?)</thinking>', raw, flags=re.DOTALL)
    reasoning = think.group(1).strip() if think else ""
    content = re.sub(r'<thinking>.*?</thinking>', '', raw, flags=re.DOTALL)

    # Extract + strip the node→line map, then validate it against the source length
    linemap: dict = {}
    lm = re.search(r'<linemap>(.*?)</linemap>', content, flags=re.DOTALL)
    if lm:
        linemap = parse_linemap(lm.group(1), num_lines)
        content = content[:lm.start()] + content[lm.end():]

    # Quote-wrap each node label and escape any leaked code characters
    mermaid = quote_labels(content).strip() # and remove excess whitespace

    # ----- Agent trace (append-only JSONL; downloadable at /traces) -----
    write_trace({
        "id": uuid.uuid4().hex,
        "ts": datetime.now(timezone.utc).isoformat(),
        "event": "generate_flowchart",
        "model": MODEL_NAME,
        "params": {"temperature": 0.1, "max_tokens": 1024, "n_ctx": 4096},
        "input": {"src_code": src_code, "num_lines": num_lines},
        "reasoning": reasoning,
        "output": {"raw": raw, "mermaid": mermaid, "linemap": linemap},
        "usage": {
            "prompt_tokens": usage.get("prompt_tokens"),
            "completion_tokens": usage.get("completion_tokens"),
            "total_tokens": usage.get("total_tokens"),
        },
        "latency_ms": latency_ms,
        "status": "ok",
    })

    return {"mermaid": mermaid, "linemap": linemap}

# ----- Custom Frontend ----- #
# Served from frontend.html so the same file can be opened directly in a
# browser (file://) to preview the UI without loading the model.
index_html = (Path(__file__).parent / "frontend.html").read_text(encoding="utf-8")

# Load the custom HTML
# / takes precedent over default Blocks UI
@app.get("/")
def index():
    return HTMLResponse(index_html)

# Serve the vendored frontend assets (Mermaid, CodeMirror bundle, Gradio client,
# fonts) locally so the app needs NO external CDN/API at runtime.
STATIC_DIR = (Path(__file__).parent / "static").resolve()
mimetypes.add_type("text/javascript", ".js")
mimetypes.add_type("font/woff2", ".woff2")

@app.get("/static/{fname:path}")
def static_files(fname: str):
    fp = (STATIC_DIR / fname).resolve()
    # contain to STATIC_DIR (no path traversal) and require a real file
    if not str(fp).startswith(str(STATIC_DIR) + "/") or not fp.is_file():
        return PlainTextResponse("not found", status_code=404)
    mt, _ = mimetypes.guess_type(str(fp))
    return FileResponse(fp, media_type=mt or "application/octet-stream")

# Download every agent trace collected this run (one JSON object per line).
#   curl https://<your-space>/traces > agent_traces.jsonl
@app.get("/traces")
def traces():
    text = TRACE_PATH.read_text(encoding="utf-8") if TRACE_PATH.exists() else ""
    return PlainTextResponse(text, media_type="application/x-ndjson",
                             headers={"Content-Disposition": 'attachment; filename="agent_traces.jsonl"'})

app.launch(share=False)   # no external gradio.live tunnel — fully self-hosted