Spaces:

prithivMLmods
/

Multimodal-Edge-Node

Running on Zero

App Files Files Community

prithivMLmods commited on 8 days ago

Commit

bca4e3d

verified ·

1 Parent(s): 1c706f5

update app [final] ✅

Browse files

Files changed (1) hide show

app.py +333 -503

app.py CHANGED Viewed

@@ -36,17 +36,51 @@ DTYPE = (
     else torch.float16
 )
 QWEN_4B_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-4B-Unredacted-MAX"
 QWEN_4B_MODEL_NAME      = "Qwen/Qwen3.5-4B"
 QWEN_2B_MODEL_NAME      = "Qwen/Qwen3.5-2B"
-QWEN_VL_2B_MODEL_NAME   = "Qwen/Qwen3-VL-2B-Instruct"
-QWEN_VL_4B_MODEL_NAME   = "Qwen/Qwen3-VL-4B-Instruct"
 LFM_450_MODEL_NAME      = "LiquidAI/LFM2.5-VL-450M"
 GEMMA4_E2B_NAME         = "google/gemma-4-E2B-it"
 LFM_16_MODEL_NAME       = "LiquidAI/LFM2.5-VL-1.6B"
 QWEN_UNREDACTED_NAME    = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
 QWEN25_VL_3B_NAME       = "Qwen/Qwen2.5-VL-3B-Instruct"
 # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
 print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
 try:
@@ -86,40 +120,6 @@ except Exception as e:
     qwen_2b_model = None
     qwen_2b_processor = None
-# ── Qwen3-VL-2B-Instruct ────────────────────────────────
-print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
-try:
-    qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
-        QWEN_VL_2B_MODEL_NAME,
-        trust_remote_code=True,
-        torch_dtype=torch.bfloat16,
-    ).to(DEVICE).eval()
-    qwen_vl_2b_processor = AutoProcessor.from_pretrained(
-        QWEN_VL_2B_MODEL_NAME, trust_remote_code=True
-    )
-    print("Qwen3-VL-2B model loaded successfully.")
-except Exception as e:
-    print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
-    qwen_vl_2b_model = None
-    qwen_vl_2b_processor = None
-# ── Qwen3-VL-4B-Instruct ────────────────────────────────
-print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
-try:
-    qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
-        QWEN_VL_4B_MODEL_NAME,
-        trust_remote_code=True,
-        torch_dtype=torch.bfloat16,
-    ).to(DEVICE).eval()
-    qwen_vl_4b_processor = AutoProcessor.from_pretrained(
-        QWEN_VL_4B_MODEL_NAME, trust_remote_code=True
-    )
-    print("Qwen3-VL-4B model loaded successfully.")
-except Exception as e:
-    print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
-    qwen_vl_4b_model = None
-    qwen_vl_4b_processor = None
 # ── LFM2.5-VL-450M ──────────────────────────────────────
 print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
 try:
@@ -215,7 +215,7 @@ def safe_parse_json(text: str):
 # --- Inference Generator (Streaming) ---
 @spaces.GPU(duration=120)
 def generate_inference_stream(
-    image: Image.Image, category: str, prompt: str, model_id: str = "qwen_4b_unredacted"
 ):
     if category == "Query":
         full_prompt = prompt
@@ -228,30 +228,30 @@ def generate_inference_stream(
     else:
         full_prompt = prompt
-    # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────
-    if model_id == "qwen_4b_unredacted":
-        if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
-            yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
-        text_input = qwen_4b_unredacted_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
-        inputs = qwen_4b_unredacted_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
-        ).to(qwen_4b_unredacted_model.device)
         streamer = TextIteratorStreamer(
-            qwen_4b_unredacted_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
-            target=qwen_4b_unredacted_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.5, min_p=0.1),
         )
         thread.start()
         for tok in streamer:
@@ -259,30 +259,30 @@ def generate_inference_stream(
                 yield f"data: {json.dumps({'chunk': tok})}\n\n"
         thread.join()
-    # ── Qwen3.5-4B ──────────────────────────────────────
-    elif model_id == "qwen_4b":
-        if qwen_4b_model is None or qwen_4b_processor is None:
-            yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
-        text_input = qwen_4b_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
-        inputs = qwen_4b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
-        ).to(qwen_4b_model.device)
         streamer = TextIteratorStreamer(
-            qwen_4b_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
-            target=qwen_4b_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.5, min_p=0.1),
         )
         thread.start()
         for tok in streamer:
@@ -290,28 +290,28 @@ def generate_inference_stream(
                 yield f"data: {json.dumps({'chunk': tok})}\n\n"
         thread.join()
-    # ── Qwen3.5-2B ──────────────────────────────────────
-    elif model_id == "qwen_2b":
-        if qwen_2b_model is None or qwen_2b_processor is None:
-            yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
-        text_input = qwen_2b_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
-        inputs = qwen_2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
-        ).to(qwen_2b_model.device)
         streamer = TextIteratorStreamer(
-            qwen_2b_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
-            target=qwen_2b_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
                         use_cache=True, temperature=1.5, min_p=0.1),
         )
@@ -321,30 +321,30 @@ def generate_inference_stream(
                 yield f"data: {json.dumps({'chunk': tok})}\n\n"
         thread.join()
-    # ── Qwen3-VL-2B ─────────────────────────────────────
-    elif model_id == "qwen_vl_2b":
-        if qwen_vl_2b_model is None or qwen_vl_2b_processor is None:
-            yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-2B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
-        text_input = qwen_vl_2b_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
-        inputs = qwen_vl_2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
-        ).to(qwen_vl_2b_model.device)
         streamer = TextIteratorStreamer(
-            qwen_vl_2b_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
-            target=qwen_vl_2b_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.0, do_sample=True),
         )
         thread.start()
         for tok in streamer:
@@ -352,30 +352,30 @@ def generate_inference_stream(
                 yield f"data: {json.dumps({'chunk': tok})}\n\n"
         thread.join()
-    # ── Qwen3-VL-4B ─────────────────────────────────────
-    elif model_id == "qwen_vl_4b":
-        if qwen_vl_4b_model is None or qwen_vl_4b_processor is None:
-            yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-4B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
-        text_input = qwen_vl_4b_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
-        inputs = qwen_vl_4b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
-        ).to(qwen_vl_4b_model.device)
         streamer = TextIteratorStreamer(
-            qwen_vl_4b_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
-            target=qwen_vl_4b_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.0, do_sample=True),
         )
         thread.start()
         for tok in streamer:
@@ -546,7 +546,7 @@ async def run_inference(
     image:    UploadFile = File(...),
     category: str        = Form(...),
     prompt:   str        = Form(...),
-    model_id: str        = Form("qwen_4b_unredacted"),
 ):
     try:
         img_bytes = await image.read()
@@ -589,10 +589,8 @@ async def homepage(request: Request):
         }
         * { box-sizing: border-box; margin: 0; padding: 0; }
         html, body {
-            min-height: 100%;
-            background: var(--bg);
-            color: var(--text);
-            font-family: 'JetBrains Mono', monospace;
         }
         body {
             background-image:
@@ -605,13 +603,11 @@ async def homepage(request: Request):
         }
         /* ── Top Bar ── */
         .top-bar {
-            position: sticky; top: 0; left: 0; right: 0;
-            height: 42px;
             background: rgba(13,13,15,0.95);
             border-bottom: 1px solid var(--node-border);
             display: flex; align-items: center; padding: 0 20px;
-            gap: 12px; z-index: 1000;
-            backdrop-filter: blur(12px);
         }
         .top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
         .top-bar .sep  { color: var(--node-border); }
@@ -625,146 +621,98 @@ async def homepage(request: Request):
         }
         /* ── Canvas ── */
         #canvas {
-            position: relative;
-            width: 1360px;
-            min-height: calc(100vh - 42px);
-            height: 900px;
-            margin: 0 auto;
         }
         svg.wires {
             position: absolute; top: 0; left: 0;
             width: 100%; height: 100%;
-            pointer-events: none; z-index: 2;
-            overflow: visible;
-        }
-        path.wire {
-            fill: none; stroke: var(--wire); stroke-width: 2.5;
-            stroke-linecap: round;
         }
         path.wire.active {
             stroke: var(--wire-active); stroke-width: 3;
-            stroke-dasharray: 8 4;
-            animation: flow 0.6s linear infinite;
         }
         @keyframes flow { to { stroke-dashoffset: -24; } }
         /* ── Nodes ── */
         .node {
             position: absolute; width: 295px;
-            background: var(--node-bg);
-            border: 1px solid var(--node-border);
-            border-radius: 9px;
-            box-shadow: 0 8px 28px rgba(0,0,0,0.5);
-            z-index: 10; display: flex; flex-direction: column;
-            transition: box-shadow 0.2s;
-        }
-        .node:hover {
-            box-shadow: 0 8px 28px rgba(0,0,0,0.5),
-                        0 0 0 1px rgba(124,106,247,0.3);
         }
         .node.fixed-height { height: 340px; }
         .node-header {
-            background: var(--node-header);
-            padding: 7px 12px;
-            border-bottom: 1px solid var(--node-border);
-            border-radius: 9px 9px 0 0;
-            font-size: 11px; font-weight: 700;
-            cursor: grab;
             display: flex; justify-content: space-between; align-items: center;
             flex-shrink: 0; user-select: none;
         }
         .node-header:active { cursor: grabbing; }
         .node-header .id {
             font-size: 10px; color: var(--muted);
-            background: rgba(255,255,255,0.04);
-            padding: 2px 7px; border-radius: 4px;
-        }
-        .node-body {
-            padding: 10px;
-            display: flex; flex-direction: column; gap: 8px;
-            flex: 1; overflow: hidden;
         }
         /* ── Ports ── */
         .port {
             position: absolute; width: 11px; height: 11px;
-            background: var(--node-bg);
-            border: 2px solid var(--port);
             border-radius: 50%; z-index: 30;
         }
         .port.out { right: -6px; }
         .port.in  { left:  -6px; }
         /* ── Labels ── */
         label {
-            font-size: 10px; color: var(--muted);
-            font-weight: 600; display: block; margin-bottom: 3px;
-            letter-spacing: 0.07em; text-transform: uppercase;
         }
         input[type="file"] { display: none; }
         /* ── Upload Zone ── */
         .file-upload {
-            border: 1.5px dashed var(--node-border);
-            border-radius: 7px; padding: 12px 10px;
-            text-align: center; cursor: pointer;
-            font-size: 11px; color: var(--muted);
-            transition: border-color 0.2s, background 0.2s;
-            background: rgba(255,255,255,0.01);
             display: flex; flex-direction: column; align-items: center; gap: 5px;
         }
-        .file-upload:hover {
-            border-color: var(--accent);
-            background: rgba(124,106,247,0.04);
-        }
         .file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
         .file-upload:hover svg { opacity: 0.9; }
         /* ── Preview wrapper ── */
         .preview-wrap {
-            display: none; position: relative;
-            border-radius: 7px; overflow: hidden;
-            border: 1px solid var(--node-border); background: #000;
         }
         .preview-wrap.visible { display: block; }
         .img-preview { width: 100%; height: 170px; object-fit: contain; display: block; }
         /* ── Clear button ── */
         .clear-btn {
-            position: absolute; top: 6px; right: 6px;
-            width: 24px; height: 24px; border-radius: 50%;
-            background: rgba(13,13,15,0.80);
-            border: 1px solid var(--node-border);
-            color: var(--accent3); cursor: pointer;
             display: flex; align-items: center; justify-content: center;
             transition: background 0.18s, border-color 0.18s, transform 0.12s;
             z-index: 20; backdrop-filter: blur(6px);
         }
-        .clear-btn:hover {
-            background: rgba(255,107,107,0.18);
-            border-color: var(--accent3); transform: scale(1.08);
-        }
         .clear-btn:active { transform: scale(0.95); }
         .clear-btn svg { pointer-events: none; }
         /* ── Filename chip ── */
         .img-chip {
             display: none; align-items: center; gap: 6px;
-            background: rgba(124,106,247,0.08);
-            border: 1px solid rgba(124,106,247,0.22);
-            border-radius: 5px; padding: 4px 8px;
-            font-size: 9px; color: var(--muted); overflow: hidden;
         }
         .img-chip.visible { display: flex; }
-        .img-chip .chip-dot {
-            width: 5px; height: 5px; border-radius: 50%;
-            background: var(--accent2); flex-shrink: 0;
-            box-shadow: 0 0 4px var(--accent2);
-        }
-        .img-chip .chip-name {
-            overflow: hidden; text-overflow: ellipsis;
-            white-space: nowrap; flex: 1;
-            color: var(--text); font-size: 9px;
-        }
         .img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 9px; }
         select, textarea {
-            width: 100%; background: rgba(0,0,0,0.3);
-            border: 1px solid var(--node-border);
-            color: var(--text); padding: 7px 9px;
-            border-radius: 5px; outline: none;
             font-size: 11px; font-family: 'JetBrains Mono', monospace;
             resize: none; transition: border-color 0.2s;
         }
@@ -773,99 +721,77 @@ async def homepage(request: Request):
         button.run-btn {
             background: linear-gradient(135deg, var(--accent), #9b59b6);
             color: #fff; border: none; padding: 8px; border-radius: 6px;
-            font-weight: 700; font-size: 11px;
-            font-family: 'JetBrains Mono', monospace; cursor: pointer;
-            transition: opacity 0.2s, transform 0.1s;
             display: flex; justify-content: center; align-items: center; gap: 8px;
             letter-spacing: 0.04em; flex-shrink: 0;
         }
-        button.run-btn:hover  { opacity: 0.9; }
-        button.run-btn:active { transform: scale(0.98); }
         button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
         /* ── Output node ── */
-        .output-node-body {
-            padding: 10px; display: flex; flex-direction: column;
-            gap: 6px; flex: 1; overflow: hidden;
-        }
-        .output-header-row {
-            display: flex; align-items: center;
-            justify-content: space-between; flex-shrink: 0;
-        }
-        /* ── Copy button ── */
-        .copy-btn {
             display: flex; align-items: center; gap: 5px;
-            background: rgba(124,106,247,0.10);
-            border: 1px solid rgba(124,106,247,0.25);
             border-radius: 5px; padding: 3px 8px;
-            font-size: 9px; font-weight: 700;
-            font-family: 'JetBrains Mono', monospace;
-            color: var(--accent); cursor: pointer;
-            letter-spacing: 0.05em;
-            transition: background 0.18s, border-color 0.18s, transform 0.1s;
-            flex-shrink: 0;
         }
-        .copy-btn:hover { background: rgba(124,106,247,0.22); border-color: var(--accent); }
-        .copy-btn:active { transform: scale(0.95); }
-        .copy-btn.copied {
-            background: rgba(78,205,196,0.15);
-            border-color: var(--accent2); color: var(--accent2);
         }
-        .copy-btn svg { pointer-events: none; flex-shrink: 0; }
         .output-box {
-            background: rgba(0,0,0,0.4);
-            border: 1px solid var(--node-border);
-            border-radius: 5px; padding: 10px;
-            flex: 1; overflow-y: auto;
-            font-size: 11px; line-height: 1.6;
-            color: #c8c8e0; white-space: pre-wrap;
-            user-select: text;
-            font-family: 'JetBrains Mono', monospace; min-height: 0;
         }
-        /* ── Grounding ── */
         .ground-canvas-wrap {
-            position: relative; flex: 1;
-            border: 1px solid var(--node-border);
-            border-radius: 5px; overflow: hidden;
-            background: #000; min-height: 0;
         }
         .ground-canvas-wrap canvas { width: 100%; height: 100%; object-fit: contain; display: block; }
         .ground-placeholder {
-            position: absolute; inset: 0;
-            display: flex; align-items: center; justify-content: center;
-            font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
         }
         .loader {
-            width: 11px; height: 11px;
-            border: 2px solid rgba(255,255,255,0.3);
             border-top-color: #fff; border-radius: 50%;
             animation: spin 0.7s linear infinite; display: none;
         }
         @keyframes spin { to { transform: rotate(360deg); } }
-        .status-dot {
-            width: 6px; height: 6px; border-radius: 50%;
-            background: var(--muted); display: inline-block; margin-right: 6px;
-        }
         .status-dot.active { background: var(--accent2); box-shadow: 0 0 5px var(--accent2); }
         /* ── Model badges ── */
         .model-badge {
-            display: inline-block; padding: 2px 7px;
-            border-radius: 4px; font-size: 9px; font-weight: 700;
-            letter-spacing: 0.06em; text-transform: uppercase;
         }
         .model-badge.q4bunred { background: rgba(255,80,80,0.18);   color: #ff5050;        border: 1px solid rgba(255,80,80,0.40); }
         .model-badge.q4b      { background: rgba(255,200,80,0.15);  color: #ffc850;        border: 1px solid rgba(255,200,80,0.35); }
         .model-badge.q2b      { background: rgba(124,106,247,0.2);  color: var(--accent);  border: 1px solid rgba(124,106,247,0.3); }
-        .model-badge.qvl2b    { background: rgba(255,150,50,0.15);  color: #ff9632;        border: 1px solid rgba(255,150,50,0.35); }
-        .model-badge.qvl4b    { background: rgba(255,100,80,0.15);  color: #ff6450;        border: 1px solid rgba(255,100,80,0.35); }
         .model-badge.lfm450   { background: rgba(78,205,196,0.15);  color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
         .model-badge.g4e2b    { background: rgba(66,197,107,0.15);  color: #42c56b;        border: 1px solid rgba(66,197,107,0.35); }
         .model-badge.lfm16    { background: rgba(107,203,119,0.15); color: #6bcb77;        border: 1px solid rgba(107,203,119,0.35); }
         .model-badge.qunred   { background: rgba(255,80,160,0.15);  color: #ff50a0;        border: 1px solid rgba(255,80,160,0.35); }
         .model-badge.q25vl3b  { background: rgba(80,180,255,0.15);  color: #50b4ff;        border: 1px solid rgba(80,180,255,0.35); }
-        .model-info-box {
-            border-radius: 6px; padding: 9px;
-            font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0;
-        }
         .canvas-footer { height: 36px; }
     </style>
 </head>
@@ -897,8 +823,7 @@ async def homepage(request: Request):
                 <label>Upload Image</label>
                 <div class="file-upload" id="dropZone">
                     <svg width="30" height="30" viewBox="0 0 24 24" fill="none"
-                         stroke="#7c6af7" stroke-width="1.5"
-                         stroke-linecap="round" stroke-linejoin="round">
                         <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
                         <circle cx="8.5" cy="8.5" r="1.5"/>
                         <polyline points="21 15 16 10 5 21"/>
@@ -910,8 +835,7 @@ async def homepage(request: Request):
                     <img id="imgPreview" class="img-preview" />
                     <button class="clear-btn" id="clearBtn" title="Remove image">
                         <svg width="12" height="12" viewBox="0 0 24 24" fill="none"
-                             stroke="currentColor" stroke-width="2.5"
-                             stroke-linecap="round" stroke-linejoin="round">
                             <line x1="18" y1="6" x2="6" y2="18"/>
                             <line x1="6" y1="6" x2="18" y2="18"/>
                         </svg>
@@ -937,11 +861,11 @@ async def homepage(request: Request):
             <div>
                 <label>Active Model</label>
                 <select id="modelSelect">
                     <option value="qwen_4b_unredacted">Qwen3.5-4B-Unredacted-MAX</option>
                     <option value="qwen_4b">Qwen3.5-4B</option>
                     <option value="qwen_2b">Qwen3.5-2B</option>
-                    <option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
-                    <option value="qwen_vl_4b">Qwen3-VL-4B-Instruct</option>
                     <option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
                     <option value="gemma4_e2b">Gemma4-E2B-it (Google)</option>
                     <option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
@@ -950,10 +874,10 @@ async def homepage(request: Request):
                 </select>
             </div>
             <div id="modelInfoBox" class="model-info-box"
-                 style="background:rgba(255,80,80,0.07);border:1px solid rgba(255,80,80,0.3);">
-                <span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
-                Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
-                with extended instruction-following &amp; unrestricted reasoning.
             </div>
             <div style="flex:1;"></div>
         </div>
@@ -1000,10 +924,9 @@ async def homepage(request: Request):
         <div class="output-node-body">
             <div class="output-header-row">
                 <label style="margin-bottom:0;">Streamed Result</label>
-                <button class="copy-btn" id="copyBtn" title="Copy result to clipboard">
                     <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
-                         stroke="currentColor" stroke-width="2.2"
-                         stroke-linecap="round" stroke-linejoin="round">
                         <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
                         <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
                     </svg>
@@ -1021,8 +944,19 @@ async def homepage(request: Request):
             <span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
             <span class="id">ID: 05</span>
         </div>
-        <div class="node-body">
-            <label>Point / Detect Overlay</label>
             <div class="ground-canvas-wrap">
                 <canvas id="groundCanvas"></canvas>
                 <div class="ground-placeholder" id="groundPlaceholder">
@@ -1042,9 +976,8 @@ async def homepage(request: Request):
 const canvasEl = document.getElementById('canvas');
 function portCenter(id) {
     const el = document.getElementById(id);
-    if (!el) return { x:0, y:0 };
-    const er = el.getBoundingClientRect();
-    const cr = canvasEl.getBoundingClientRect();
     return { x: er.left + er.width/2 - cr.left, y: er.top + er.height/2 - cr.top };
 }
 function bezier(p1, p2) {
@@ -1071,17 +1004,16 @@ document.querySelectorAll('.node').forEach(node => {
     const header = node.querySelector('.node-header');
     let drag = false, sx, sy, il, it;
     header.addEventListener('mousedown', e => {
-        drag = true; sx = e.clientX; sy = e.clientY;
-        il = parseInt(node.style.left)||0; it = parseInt(node.style.top)||0;
-        node.style.zIndex = 100; e.preventDefault();
     });
     document.addEventListener('mousemove', e => {
         if (!drag) return;
-        node.style.left = `${il + e.clientX - sx}px`;
-        node.style.top  = `${it + e.clientY - sy}px`;
         updateWires();
     });
-    document.addEventListener('mouseup', () => { if (drag) { drag=false; node.style.zIndex=10; } });
 });
 window.addEventListener('resize', updateWires);
 window.addEventListener('scroll', updateWires);
@@ -1103,30 +1035,22 @@ const chipSize    = document.getElementById('chipSize');
 const dotImg      = document.getElementById('dot-img');
 function formatBytes(b) {
-    if (b < 1024)    return b + ' B';
-    if (b < 1048576) return (b/1024).toFixed(1) + ' KB';
-    return (b/1048576).toFixed(1) + ' MB';
 }
 function handleFile(file) {
-    if (!file || !file.type.startsWith('image/')) return;
-    currentFile = file;
-    imgPreview.src = URL.createObjectURL(file);
-    previewWrap.classList.add('visible');
-    dropZone.style.display = 'none';
-    chipName.textContent = file.name;
-    chipSize.textContent = formatBytes(file.size);
-    imgChip.classList.add('visible');
-    dotImg.classList.add('active');
     requestAnimationFrame(updateWires);
 }
 function clearImage() {
-    currentFile = null; imgPreview.src = '';
-    previewWrap.classList.remove('visible');
-    dropZone.style.display = '';
-    imgChip.classList.remove('visible');
-    chipName.textContent = '—'; chipSize.textContent = '';
-    fileInput.value = ''; dotImg.classList.remove('active');
-    requestAnimationFrame(updateWires);
 }
 dropZone.onclick     = () => fileInput.click();
 fileInput.onchange   = e  => handleFile(e.target.files[0]);
@@ -1147,6 +1071,18 @@ const dotModel     = document.getElementById('dot-model');
 dotModel.classList.add('active');
 const MODEL_INFO = {
     qwen_4b_unredacted: {
         html:   `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
                  Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
@@ -1165,18 +1101,6 @@ const MODEL_INFO = {
                  Lightweight &amp; fast — ideal for quick Query, Caption, Point &amp; Detect tasks.`,
         bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
     },
-    qwen_vl_2b: {
-        html:   `<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
-                 Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
-                 Strong spatial grounding, OCR &amp; instruction-following.`,
-        bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.25)',
-    },
-    qwen_vl_4b: {
-        html:   `<span class="model-badge qvl4b">QWEN3-VL · 4B</span><br><br>
-                 Qwen3-VL-4B-Instruct — enhanced vision-language model by Alibaba Cloud.
-                 Superior spatial grounding, richer OCR &amp; stronger multi-step reasoning.`,
-        bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
-    },
     lfm_450: {
         html:   `<span class="model-badge lfm450">LFM · 450M</span><br><br>
                  LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
@@ -1227,99 +1151,52 @@ const PLACEHOLDERS = {
     Point:   'e.g., The gun held by the person.',
     Detect:  'e.g., The headlight of the car.',
 };
-categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value] || ''; };
 // ══════════════════════════════════════════════
 //  ROBUST JSON EXTRACTOR
-//  Strategy:
-//  1. Strip ALL <think>…</think> blocks (greedy,
-//     handles the tag appearing after the JSON too)
-//  2. Strip markdown fences
-//  3. Find the LAST occurrence of a JSON array [ ]
-//     or object { } — models typically emit the
-//     clean JSON block after their reasoning prose
-//  4. Use a bracket-depth walker to extract it
-//     precisely without cutting off nested objects
 // ══════════════════════════════════════════════
 function extractGroundingJSON(raw) {
-    // Step 1 — kill ALL <think> … </think> sections
-    // Use greedy .* with DOTALL flag emulation via [\s\S]
-    // Run multiple passes in case of nested/malformed tags
-    let text = raw;
-    let prev = null;
     while (prev !== text) {
         prev = text;
         text = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
     }
-    // Step 2 — strip markdown code fences  ```json … ```
-    text = text.replace(/```(?:json)?[\\s\\S]*?```/gi, function(m) {
-        // Keep the inner content, just remove the fences
-        return m.replace(/```(?:json)?/gi, '').replace(/```/g, '');
-    });
-    // Step 3 — strip any remaining lone fence markers
-    text = text.replace(/```/g, '');
-    text = text.trim();
-    // Helper: walk from startIdx and extract a balanced
-    // bracket expression (open/close must match).
     function extractBalanced(str, startIdx, openCh, closeCh) {
-        let depth = 0, inStr = false, esc = false;
-        for (let i = startIdx; i < str.length; i++) {
-            const c = str[i];
-            if (esc)         { esc = false; continue; }
-            if (c === '\\\\') { esc = true;  continue; }
-            if (c === '"')   { inStr = !inStr; continue; }
-            if (inStr)       continue;
-            if (c === openCh)  depth++;
-            if (c === closeCh) {
                 depth--;
-                if (depth === 0) {
-                    try { return JSON.parse(str.slice(startIdx, i + 1)); }
-                    catch(_) { return null; }
                 }
             }
         }
         return null;
     }
-    // Step 4 — find the LAST JSON array in the text
-    // (models often emit prose first, JSON last)
-    let lastArrIdx = -1;
-    for (let i = text.length - 1; i >= 0; i--) {
-        if (text[i] === '[') { lastArrIdx = i; break; }
-    }
-    if (lastArrIdx !== -1) {
-        const result = extractBalanced(text, lastArrIdx, '[', ']');
-        if (result !== null) return result;
-    }
-    // Step 5 — find the LAST JSON object in the text
-    let lastObjIdx = -1;
-    for (let i = text.length - 1; i >= 0; i--) {
-        if (text[i] === '{') { lastObjIdx = i; break; }
     }
-    if (lastObjIdx !== -1) {
-        const result = extractBalanced(text, lastObjIdx, '{', '}');
-        if (result !== null) return result;
     }
-    // Step 6 — try FIRST array (fallback)
-    const firstArr = text.indexOf('[');
-    if (firstArr !== -1) {
-        const result = extractBalanced(text, firstArr, '[', ']');
-        if (result !== null) return result;
-    }
-    // Step 7 — try FIRST object (fallback)
-    const firstObj = text.indexOf('{');
-    if (firstObj !== -1) {
-        const result = extractBalanced(text, firstObj, '{', '}');
-        if (result !== null) return result;
-    }
-    // Step 8 — last resort full parse
     try { return JSON.parse(text); } catch(_) {}
     return null;
 }
@@ -1330,122 +1207,95 @@ function extractGroundingJSON(raw) {
 const groundCanvas      = document.getElementById('groundCanvas');
 const groundPlaceholder = document.getElementById('groundPlaceholder');
 const gCtx              = groundCanvas.getContext('2d');
 const PALETTE = ['#4ecdc4','#7c6af7','#ff6b6b','#ffd93d','#6bcb77','#ff922b','#cc5de8','#339af0'];
 function hexToRgba(hex, alpha) {
-    const r = parseInt(hex.slice(1,3),16);
-    const g = parseInt(hex.slice(3,5),16);
-    const b = parseInt(hex.slice(5,7),16);
     return `rgba(${r},${g},${b},${alpha})`;
 }
 function roundRect(ctx, x, y, w, h, r) {
-    ctx.beginPath();
-    ctx.moveTo(x+r,y);
     ctx.lineTo(x+w-r,y); ctx.quadraticCurveTo(x+w,y,x+w,y+r);
     ctx.lineTo(x+w,y+h-r); ctx.quadraticCurveTo(x+w,y+h,x+w-r,y+h);
     ctx.lineTo(x+r,y+h); ctx.quadraticCurveTo(x,y+h,x,y+h-r);
-    ctx.lineTo(x,y+r); ctx.quadraticCurveTo(x,y,x+r,y);
-    ctx.closePath();
 }
 function drawGrounding(imgSrc, rawText) {
     const parsed = extractGroundingJSON(rawText);
-    if (!parsed) {
-        console.warn('Grounding: could not extract JSON:', rawText.slice(0, 200));
-        return;
-    }
     const img = new Image();
     img.onload = () => {
-        const W = img.naturalWidth, H = img.naturalHeight;
-        groundCanvas.width  = W;
-        groundCanvas.height = H;
         gCtx.drawImage(img, 0, 0);
-        groundPlaceholder.style.display = 'none';
-        const lw = Math.max(2, W/200);
-        const fs = Math.max(12, W/40);
-        gCtx.lineWidth = lw;
-        gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
         const items = Array.isArray(parsed) ? parsed : [parsed];
         items.forEach((item, i) => {
             const col = PALETTE[i % PALETTE.length];
-            // ── Detect: bounding box ─────────────────────
             let bbox = null;
-            if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4)
-                bbox = item.bbox_2d;
-            else if (Array.isArray(item?.bbox) && item.bbox.length === 4)
-                bbox = item.bbox;
-            else if (Array.isArray(item) && item.length === 4
-                     && item.every(n => typeof n === 'number'))
-                bbox = item;
             if (bbox) {
-                let [x1,y1,x2,y2] = bbox.map(Number);
-                // Handle normalised 0-1 coords
-                if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
-                    x1*=W; y1*=H; x2*=W; y2*=H;
-                }
-                // Swap if inverted
-                if (x2 < x1) [x1,x2] = [x2,x1];
-                if (y2 < y1) [y1,y2] = [y2,y1];
-                const bw  = x2-x1, bh = y2-y1;
-                const lbl = (item?.label ?? `obj ${i+1}`).toString();
-                gCtx.fillStyle   = hexToRgba(col, 0.20);
-                gCtx.fillRect(x1,y1,bw,bh);
-                gCtx.strokeStyle = col;
-                gCtx.lineWidth   = lw;
-                gCtx.strokeRect(x1,y1,bw,bh);
-                const tw = gCtx.measureText(lbl).width;
-                const ph = fs*1.45, pw = tw+12;
-                const lx = x1, ly = Math.max(0, y1-ph);
-                gCtx.fillStyle = col;
-                roundRect(gCtx,lx,ly,pw,ph,4); gCtx.fill();
-                gCtx.fillStyle = '#fff';
-                gCtx.fillText(lbl, lx+6, ly+ph*0.76);
                 return;
             }
-            // ── Point: 2-D coordinate ─────────────────────
             let pt = null;
-            if (Array.isArray(item?.point_2d) && item.point_2d.length === 2)
-                pt = item.point_2d;
-            else if (Array.isArray(item?.point) && item.point.length === 2)
-                pt = item.point;
-            else if (Array.isArray(item) && item.length === 2
-                     && item.every(n => typeof n === 'number'))
-                pt = item;
             if (pt) {
-                let [x,y] = pt.map(Number);
-                if (x <= 1 && y <= 1) { x*=W; y*=H; }
-                const r   = Math.max(8, W/60);
-                const lbl = (item?.label ?? `pt ${i+1}`).toString();
-                gCtx.beginPath();
-                gCtx.arc(x, y, r*1.8, 0, Math.PI*2);
-                gCtx.fillStyle = hexToRgba(col, 0.18); gCtx.fill();
-                gCtx.beginPath();
-                gCtx.arc(x, y, r, 0, Math.PI*2);
-                gCtx.fillStyle   = col; gCtx.fill();
-                gCtx.strokeStyle = '#fff';
-                gCtx.lineWidth   = Math.max(1.5, lw);
-                gCtx.stroke();
-                gCtx.fillStyle = '#fff';
-                gCtx.fillText(lbl, x+r+5, y+fs*0.4);
             }
         });
     };
-    img.onerror = () => console.error('Grounding: failed to load image for overlay.');
     img.src = imgSrc;
 }
@@ -1460,31 +1310,26 @@ function resetCopyBtn() {
     copyBtn.classList.remove('copied');
     copyBtn.innerHTML = `
         <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
-             stroke="currentColor" stroke-width="2.2"
-             stroke-linecap="round" stroke-linejoin="round">
             <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
             <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
         </svg> COPY`;
 }
 copyBtn.onclick = () => {
-    const txt = outputBox.innerText || '';
-    if (!txt || txt === 'Results will stream here...') return;
     navigator.clipboard.writeText(txt).then(() => {
         copyBtn.classList.add('copied');
         copyBtn.innerHTML = `
             <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
-                 stroke="currentColor" stroke-width="2.5"
-                 stroke-linecap="round" stroke-linejoin="round">
                 <polyline points="20 6 9 17 4 12"/>
             </svg> COPIED`;
-        clearTimeout(copyTimer);
-        copyTimer = setTimeout(resetCopyBtn, 2000);
     }).catch(() => {
-        const ta = document.createElement('textarea');
-        ta.value = txt; ta.style.position='fixed'; ta.style.opacity='0';
-        document.body.appendChild(ta); ta.select();
-        document.execCommand('copy'); document.body.removeChild(ta);
     });
 };
@@ -1503,66 +1348,53 @@ runBtn.onclick = async () => {
     const promptStr = promptInput.value.trim();
     if (!promptStr)  { alert('Please enter a prompt directive.'); return; }
-    runBtn.disabled = true;
-    btnLoader.style.display = 'inline-block';
-    outputBox.innerText = '';
-    outputBox.style.color = '';
-    groundPlaceholder.style.display = 'flex';
-    gCtx.clearRect(0, 0, groundCanvas.width, groundCanvas.height);
     dotTask.classList.add('active');
-    dotOut.classList.remove('active');
-    dotGnd.classList.remove('active');
-    allWires.forEach(id => document.getElementById(id)?.classList.add('active'));
     resetCopyBtn();
-    const formData = new FormData();
     formData.append('image',    currentFile);
     formData.append('category', categorySelect.value);
     formData.append('prompt',   promptStr);
     formData.append('model_id', modelSelect.value);
-    let fullText = '';
-    let imgObjectURL = URL.createObjectURL(currentFile);
     try {
-        const response = await fetch('/api/run', { method:'POST', body:formData });
-        if (!response.ok) {
-            const err = await response.json();
-            throw new Error(err.error || 'Execution failed.');
-        }
-        const reader  = response.body.getReader();
-        const decoder = new TextDecoder('utf-8');
-        let   buffer  = '';
         while (true) {
-            const { value, done } = await reader.read();
-            if (done) break;
-            buffer += decoder.decode(value, { stream:true });
-            const lines = buffer.split('\\n\\n');
-            buffer = lines.pop();
             for (const line of lines) {
                 if (!line.startsWith('data: ')) continue;
-                const payload = line.replace('data: ','');
-                if (payload === '[DONE]') break;
                 try {
-                    const data = JSON.parse(payload);
-                    if (data.chunk) {
-                        fullText += data.chunk;
-                        outputBox.innerText = fullText;
-                        outputBox.scrollTop = outputBox.scrollHeight;
-                    }
                 } catch(_) {}
             }
         }
         dotOut.classList.add('active');
-        // ── Grounding overlay (Point / Detect) ──────────
-        const cat = categorySelect.value;
-        if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
-            const parsed = extractGroundingJSON(fullText);
-            if (parsed !== null) {
                 dotGnd.classList.add('active');
                 drawGrounding(imgObjectURL, fullText);
             } else {
@@ -1570,14 +1402,12 @@ runBtn.onclick = async () => {
             }
         }
-    } catch (err) {
-        outputBox.innerText = `[Error] ${err.message}`;
-        outputBox.style.color = '#ff6b6b';
     } finally {
-        runBtn.disabled = false;
-        btnLoader.style.display = 'none';
         dotTask.classList.remove('active');
-        allWires.forEach(id => document.getElementById(id)?.classList.remove('active'));
     }
 };
 </script>
@@ -1586,4 +1416,4 @@ runBtn.onclick = async () => {
 """
 if __name__ == "__main__":
-    app.launch(show_error=True, ssr_mode=False)

     else torch.float16
 )
+QWEN_VL_2B_MODEL_NAME   = "Qwen/Qwen3-VL-2B-Instruct"
+QWEN_VL_4B_MODEL_NAME   = "Qwen/Qwen3-VL-4B-Instruct"
 QWEN_4B_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-4B-Unredacted-MAX"
 QWEN_4B_MODEL_NAME      = "Qwen/Qwen3.5-4B"
 QWEN_2B_MODEL_NAME      = "Qwen/Qwen3.5-2B"
 LFM_450_MODEL_NAME      = "LiquidAI/LFM2.5-VL-450M"
 GEMMA4_E2B_NAME         = "google/gemma-4-E2B-it"
 LFM_16_MODEL_NAME       = "LiquidAI/LFM2.5-VL-1.6B"
 QWEN_UNREDACTED_NAME    = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
 QWEN25_VL_3B_NAME       = "Qwen/Qwen2.5-VL-3B-Instruct"
+# ── Qwen3-VL-2B-Instruct ────────────────────────────────
+print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
+try:
+    qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
+        QWEN_VL_2B_MODEL_NAME,
+        trust_remote_code=True,
+        torch_dtype=torch.bfloat16,
+    ).to(DEVICE).eval()
+    qwen_vl_2b_processor = AutoProcessor.from_pretrained(
+        QWEN_VL_2B_MODEL_NAME, trust_remote_code=True
+    )
+    print("Qwen3-VL-2B model loaded successfully.")
+except Exception as e:
+    print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
+    qwen_vl_2b_model = None
+    qwen_vl_2b_processor = None
+# ── Qwen3-VL-4B-Instruct ────────────────────────────────
+print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
+try:
+    qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
+        QWEN_VL_4B_MODEL_NAME,
+        trust_remote_code=True,
+        torch_dtype=torch.bfloat16,
+    ).to(DEVICE).eval()
+    qwen_vl_4b_processor = AutoProcessor.from_pretrained(
+        QWEN_VL_4B_MODEL_NAME, trust_remote_code=True
+    )
+    print("Qwen3-VL-4B model loaded successfully.")
+except Exception as e:
+    print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
+    qwen_vl_4b_model = None
+    qwen_vl_4b_processor = None
 # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
 print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
 try:
     qwen_2b_model = None
     qwen_2b_processor = None
 # ── LFM2.5-VL-450M ──────────────────────────────────────
 print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
 try:
 # --- Inference Generator (Streaming) ---
 @spaces.GPU(duration=120)
 def generate_inference_stream(
+    image: Image.Image, category: str, prompt: str, model_id: str = "qwen_vl_2b"
 ):
     if category == "Query":
         full_prompt = prompt
     else:
         full_prompt = prompt
+    # ── Qwen3-VL-2B ─────────────���───────────────────────
+    if model_id == "qwen_vl_2b":
+        if qwen_vl_2b_model is None or qwen_vl_2b_processor is None:
+            yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-2B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
+        text_input = qwen_vl_2b_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
+        inputs = qwen_vl_2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
+        ).to(qwen_vl_2b_model.device)
         streamer = TextIteratorStreamer(
+            qwen_vl_2b_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
+            target=qwen_vl_2b_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                        use_cache=True, temperature=1.0, do_sample=True),
         )
         thread.start()
         for tok in streamer:
                 yield f"data: {json.dumps({'chunk': tok})}\n\n"
         thread.join()
+    # ── Qwen3-VL-4B ─────────────────────────────────────
+    elif model_id == "qwen_vl_4b":
+        if qwen_vl_4b_model is None or qwen_vl_4b_processor is None:
+            yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-4B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
+        text_input = qwen_vl_4b_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
+        inputs = qwen_vl_4b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
+        ).to(qwen_vl_4b_model.device)
         streamer = TextIteratorStreamer(
+            qwen_vl_4b_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
+            target=qwen_vl_4b_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                        use_cache=True, temperature=1.0, do_sample=True),
         )
         thread.start()
         for tok in streamer:
                 yield f"data: {json.dumps({'chunk': tok})}\n\n"
         thread.join()
+    # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────
+    elif model_id == "qwen_4b_unredacted":
+        if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
+            yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
+        text_input = qwen_4b_unredacted_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
+        inputs = qwen_4b_unredacted_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
+        ).to(qwen_4b_unredacted_model.device)
         streamer = TextIteratorStreamer(
+            qwen_4b_unredacted_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
+            target=qwen_4b_unredacted_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
                         use_cache=True, temperature=1.5, min_p=0.1),
         )
                 yield f"data: {json.dumps({'chunk': tok})}\n\n"
         thread.join()
+    # ── Qwen3.5-4B ──────────────────────────────────────
+    elif model_id == "qwen_4b":
+        if qwen_4b_model is None or qwen_4b_processor is None:
+            yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
+        text_input = qwen_4b_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
+        inputs = qwen_4b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
+        ).to(qwen_4b_model.device)
         streamer = TextIteratorStreamer(
+            qwen_4b_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
+            target=qwen_4b_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                        use_cache=True, temperature=1.5, min_p=0.1),
         )
         thread.start()
         for tok in streamer:
                 yield f"data: {json.dumps({'chunk': tok})}\n\n"
         thread.join()
+    # ── Qwen3.5-2B ──────────────────────────────────────
+    elif model_id == "qwen_2b":
+        if qwen_2b_model is None or qwen_2b_processor is None:
+            yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
             return
         messages = [{"role": "user", "content": [
             {"type": "image", "image": image},
             {"type": "text",  "text":  full_prompt},
         ]}]
+        text_input = qwen_2b_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
+        inputs = qwen_2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
+        ).to(qwen_2b_model.device)
         streamer = TextIteratorStreamer(
+            qwen_2b_processor.tokenizer,
             skip_prompt=True, skip_special_tokens=True, timeout=120,
         )
         thread = threading.Thread(
+            target=qwen_2b_model.generate,
             kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                        use_cache=True, temperature=1.5, min_p=0.1),
         )
         thread.start()
         for tok in streamer:
     image:    UploadFile = File(...),
     category: str        = Form(...),
     prompt:   str        = Form(...),
+    model_id: str        = Form("qwen_vl_2b"),
 ):
     try:
         img_bytes = await image.read()
         }
         * { box-sizing: border-box; margin: 0; padding: 0; }
         html, body {
+            min-height: 100%; background: var(--bg);
+            color: var(--text); font-family: 'JetBrains Mono', monospace;
         }
         body {
             background-image:
         }
         /* ── Top Bar ── */
         .top-bar {
+            position: sticky; top: 0; left: 0; right: 0; height: 42px;
             background: rgba(13,13,15,0.95);
             border-bottom: 1px solid var(--node-border);
             display: flex; align-items: center; padding: 0 20px;
+            gap: 12px; z-index: 1000; backdrop-filter: blur(12px);
         }
         .top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
         .top-bar .sep  { color: var(--node-border); }
         }
         /* ── Canvas ── */
         #canvas {
+            position: relative; width: 1360px;
+            min-height: calc(100vh - 42px); height: 900px; margin: 0 auto;
         }
         svg.wires {
             position: absolute; top: 0; left: 0;
             width: 100%; height: 100%;
+            pointer-events: none; z-index: 2; overflow: visible;
         }
+        path.wire { fill: none; stroke: var(--wire); stroke-width: 2.5; stroke-linecap: round; }
         path.wire.active {
             stroke: var(--wire-active); stroke-width: 3;
+            stroke-dasharray: 8 4; animation: flow 0.6s linear infinite;
         }
         @keyframes flow { to { stroke-dashoffset: -24; } }
         /* ── Nodes ── */
         .node {
             position: absolute; width: 295px;
+            background: var(--node-bg); border: 1px solid var(--node-border);
+            border-radius: 9px; box-shadow: 0 8px 28px rgba(0,0,0,0.5);
+            z-index: 10; display: flex; flex-direction: column; transition: box-shadow 0.2s;
         }
+        .node:hover { box-shadow: 0 8px 28px rgba(0,0,0,0.5), 0 0 0 1px rgba(124,106,247,0.3); }
         .node.fixed-height { height: 340px; }
         .node-header {
+            background: var(--node-header); padding: 7px 12px;
+            border-bottom: 1px solid var(--node-border); border-radius: 9px 9px 0 0;
+            font-size: 11px; font-weight: 700; cursor: grab;
             display: flex; justify-content: space-between; align-items: center;
             flex-shrink: 0; user-select: none;
         }
         .node-header:active { cursor: grabbing; }
         .node-header .id {
             font-size: 10px; color: var(--muted);
+            background: rgba(255,255,255,0.04); padding: 2px 7px; border-radius: 4px;
         }
+        .node-body { padding: 10px; display: flex; flex-direction: column; gap: 8px; flex: 1; overflow: hidden; }
         /* ── Ports ── */
         .port {
             position: absolute; width: 11px; height: 11px;
+            background: var(--node-bg); border: 2px solid var(--port);
             border-radius: 50%; z-index: 30;
         }
         .port.out { right: -6px; }
         .port.in  { left:  -6px; }
         /* ── Labels ── */
         label {
+            font-size: 10px; color: var(--muted); font-weight: 600;
+            display: block; margin-bottom: 3px; letter-spacing: 0.07em; text-transform: uppercase;
         }
         input[type="file"] { display: none; }
         /* ── Upload Zone ── */
         .file-upload {
+            border: 1.5px dashed var(--node-border); border-radius: 7px; padding: 12px 10px;
+            text-align: center; cursor: pointer; font-size: 11px; color: var(--muted);
+            transition: border-color 0.2s, background 0.2s; background: rgba(255,255,255,0.01);
             display: flex; flex-direction: column; align-items: center; gap: 5px;
         }
+        .file-upload:hover { border-color: var(--accent); background: rgba(124,106,247,0.04); }
         .file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
         .file-upload:hover svg { opacity: 0.9; }
         /* ── Preview wrapper ── */
         .preview-wrap {
+            display: none; position: relative; border-radius: 7px;
+            overflow: hidden; border: 1px solid var(--node-border); background: #000;
         }
         .preview-wrap.visible { display: block; }
         .img-preview { width: 100%; height: 170px; object-fit: contain; display: block; }
         /* ── Clear button ── */
         .clear-btn {
+            position: absolute; top: 6px; right: 6px; width: 24px; height: 24px;
+            border-radius: 50%; background: rgba(13,13,15,0.80);
+            border: 1px solid var(--node-border); color: var(--accent3); cursor: pointer;
             display: flex; align-items: center; justify-content: center;
             transition: background 0.18s, border-color 0.18s, transform 0.12s;
             z-index: 20; backdrop-filter: blur(6px);
         }
+        .clear-btn:hover { background: rgba(255,107,107,0.18); border-color: var(--accent3); transform: scale(1.08); }
         .clear-btn:active { transform: scale(0.95); }
         .clear-btn svg { pointer-events: none; }
         /* ── Filename chip ── */
         .img-chip {
             display: none; align-items: center; gap: 6px;
+            background: rgba(124,106,247,0.08); border: 1px solid rgba(124,106,247,0.22);
+            border-radius: 5px; padding: 4px 8px; font-size: 9px; color: var(--muted); overflow: hidden;
         }
         .img-chip.visible { display: flex; }
+        .img-chip .chip-dot { width: 5px; height: 5px; border-radius: 50%; background: var(--accent2); flex-shrink: 0; box-shadow: 0 0 4px var(--accent2); }
+        .img-chip .chip-name { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; flex: 1; color: var(--text); font-size: 9px; }
         .img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 9px; }
         select, textarea {
+            width: 100%; background: rgba(0,0,0,0.3); border: 1px solid var(--node-border);
+            color: var(--text); padding: 7px 9px; border-radius: 5px; outline: none;
             font-size: 11px; font-family: 'JetBrains Mono', monospace;
             resize: none; transition: border-color 0.2s;
         }
         button.run-btn {
             background: linear-gradient(135deg, var(--accent), #9b59b6);
             color: #fff; border: none; padding: 8px; border-radius: 6px;
+            font-weight: 700; font-size: 11px; font-family: 'JetBrains Mono', monospace;
+            cursor: pointer; transition: opacity 0.2s, transform 0.1s;
             display: flex; justify-content: center; align-items: center; gap: 8px;
             letter-spacing: 0.04em; flex-shrink: 0;
         }
+        button.run-btn:hover   { opacity: 0.9; }
+        button.run-btn:active  { transform: scale(0.98); }
         button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
         /* ── Output node ── */
+        .output-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
+        .output-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
+        /* ── Icon buttons (copy / download) ── */
+        .icon-btn {
             display: flex; align-items: center; gap: 5px;
+            background: rgba(124,106,247,0.10); border: 1px solid rgba(124,106,247,0.25);
             border-radius: 5px; padding: 3px 8px;
+            font-size: 9px; font-weight: 700; font-family: 'JetBrains Mono', monospace;
+            color: var(--accent); cursor: pointer; letter-spacing: 0.05em;
+            transition: background 0.18s, border-color 0.18s, transform 0.1s; flex-shrink: 0;
+            text-decoration: none;
         }
+        .icon-btn:hover { background: rgba(124,106,247,0.22); border-color: var(--accent); }
+        .icon-btn:active { transform: scale(0.95); }
+        .icon-btn.teal {
+            background: rgba(78,205,196,0.10); border-color: rgba(78,205,196,0.25); color: var(--accent2);
         }
+        .icon-btn.teal:hover { background: rgba(78,205,196,0.22); border-color: var(--accent2); }
+        .icon-btn.copied { background: rgba(78,205,196,0.15); border-color: var(--accent2); color: var(--accent2); }
+        .icon-btn svg { pointer-events: none; flex-shrink: 0; }
         .output-box {
+            background: rgba(0,0,0,0.4); border: 1px solid var(--node-border);
+            border-radius: 5px; padding: 10px; flex: 1; overflow-y: auto;
+            font-size: 11px; line-height: 1.6; color: #c8c8e0; white-space: pre-wrap;
+            user-select: text; font-family: 'JetBrains Mono', monospace; min-height: 0;
         }
+        /* ── Grounding node ── */
+        .ground-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
+        .ground-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
         .ground-canvas-wrap {
+            position: relative; flex: 1; border: 1px solid var(--node-border);
+            border-radius: 5px; overflow: hidden; background: #000; min-height: 0;
         }
         .ground-canvas-wrap canvas { width: 100%; height: 100%; object-fit: contain; display: block; }
         .ground-placeholder {
+            position: absolute; inset: 0; display: flex; align-items: center;
+            justify-content: center; font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
         }
         .loader {
+            width: 11px; height: 11px; border: 2px solid rgba(255,255,255,0.3);
             border-top-color: #fff; border-radius: 50%;
             animation: spin 0.7s linear infinite; display: none;
         }
         @keyframes spin { to { transform: rotate(360deg); } }
+        .status-dot { width: 6px; height: 6px; border-radius: 50%; background: var(--muted); display: inline-block; margin-right: 6px; }
         .status-dot.active { background: var(--accent2); box-shadow: 0 0 5px var(--accent2); }
         /* ── Model badges ── */
         .model-badge {
+            display: inline-block; padding: 2px 7px; border-radius: 4px;
+            font-size: 9px; font-weight: 700; letter-spacing: 0.06em; text-transform: uppercase;
         }
+        .model-badge.qvl2b    { background: rgba(255,150,50,0.15);  color: #ff9632;        border: 1px solid rgba(255,150,50,0.35); }
+        .model-badge.qvl4b    { background: rgba(255,100,80,0.15);  color: #ff6450;        border: 1px solid rgba(255,100,80,0.35); }
         .model-badge.q4bunred { background: rgba(255,80,80,0.18);   color: #ff5050;        border: 1px solid rgba(255,80,80,0.40); }
         .model-badge.q4b      { background: rgba(255,200,80,0.15);  color: #ffc850;        border: 1px solid rgba(255,200,80,0.35); }
         .model-badge.q2b      { background: rgba(124,106,247,0.2);  color: var(--accent);  border: 1px solid rgba(124,106,247,0.3); }
         .model-badge.lfm450   { background: rgba(78,205,196,0.15);  color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
         .model-badge.g4e2b    { background: rgba(66,197,107,0.15);  color: #42c56b;        border: 1px solid rgba(66,197,107,0.35); }
         .model-badge.lfm16    { background: rgba(107,203,119,0.15); color: #6bcb77;        border: 1px solid rgba(107,203,119,0.35); }
         .model-badge.qunred   { background: rgba(255,80,160,0.15);  color: #ff50a0;        border: 1px solid rgba(255,80,160,0.35); }
         .model-badge.q25vl3b  { background: rgba(80,180,255,0.15);  color: #50b4ff;        border: 1px solid rgba(80,180,255,0.35); }
+        .model-info-box { border-radius: 6px; padding: 9px; font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0; }
         .canvas-footer { height: 36px; }
     </style>
 </head>
                 <label>Upload Image</label>
                 <div class="file-upload" id="dropZone">
                     <svg width="30" height="30" viewBox="0 0 24 24" fill="none"
+                         stroke="#7c6af7" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
                         <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
                         <circle cx="8.5" cy="8.5" r="1.5"/>
                         <polyline points="21 15 16 10 5 21"/>
                     <img id="imgPreview" class="img-preview" />
                     <button class="clear-btn" id="clearBtn" title="Remove image">
                         <svg width="12" height="12" viewBox="0 0 24 24" fill="none"
+                             stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
                             <line x1="18" y1="6" x2="6" y2="18"/>
                             <line x1="6" y1="6" x2="18" y2="18"/>
                         </svg>
             <div>
                 <label>Active Model</label>
                 <select id="modelSelect">
+                    <option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
+                    <option value="qwen_vl_4b">Qwen3-VL-4B-Instruct</option>
                     <option value="qwen_4b_unredacted">Qwen3.5-4B-Unredacted-MAX</option>
                     <option value="qwen_4b">Qwen3.5-4B</option>
                     <option value="qwen_2b">Qwen3.5-2B</option>
                     <option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
                     <option value="gemma4_e2b">Gemma4-E2B-it (Google)</option>
                     <option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
                 </select>
             </div>
             <div id="modelInfoBox" class="model-info-box"
+                 style="background:rgba(255,150,50,0.07);border:1px solid rgba(255,150,50,0.3);">
+                <span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
+                Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
+                Strong spatial grounding, OCR &amp; instruction-following.
             </div>
             <div style="flex:1;"></div>
         </div>
         <div class="output-node-body">
             <div class="output-header-row">
                 <label style="margin-bottom:0;">Streamed Result</label>
+                <button class="icon-btn" id="copyBtn" title="Copy result to clipboard">
                     <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
+                         stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
                         <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
                         <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
                     </svg>
             <span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
             <span class="id">ID: 05</span>
         </div>
+        <div class="ground-node-body">
+            <div class="ground-header-row">
+                <label style="margin-bottom:0;">Point / Detect Overlay</label>
+                <a class="icon-btn teal" id="downloadBtn" title="Download overlay image" style="display:none;">
+                    <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
+                         stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
+                        <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
+                        <polyline points="7 10 12 15 17 10"/>
+                        <line x1="12" y1="15" x2="12" y2="3"/>
+                    </svg>
+                    SAVE
+                </a>
+            </div>
             <div class="ground-canvas-wrap">
                 <canvas id="groundCanvas"></canvas>
                 <div class="ground-placeholder" id="groundPlaceholder">
 const canvasEl = document.getElementById('canvas');
 function portCenter(id) {
     const el = document.getElementById(id);
+    if (!el) return {x:0,y:0};
+    const er = el.getBoundingClientRect(), cr = canvasEl.getBoundingClientRect();
     return { x: er.left + er.width/2 - cr.left, y: er.top + er.height/2 - cr.top };
 }
 function bezier(p1, p2) {
     const header = node.querySelector('.node-header');
     let drag = false, sx, sy, il, it;
     header.addEventListener('mousedown', e => {
+        drag=true; sx=e.clientX; sy=e.clientY;
+        il=parseInt(node.style.left)||0; it=parseInt(node.style.top)||0;
+        node.style.zIndex=100; e.preventDefault();
     });
     document.addEventListener('mousemove', e => {
         if (!drag) return;
+        node.style.left=`${il+e.clientX-sx}px`; node.style.top=`${it+e.clientY-sy}px`;
         updateWires();
     });
+    document.addEventListener('mouseup', () => { if(drag){drag=false;node.style.zIndex=10;} });
 });
 window.addEventListener('resize', updateWires);
 window.addEventListener('scroll', updateWires);
 const dotImg      = document.getElementById('dot-img');
 function formatBytes(b) {
+    if (b<1024) return b+' B'; if (b<1048576) return (b/1024).toFixed(1)+' KB';
+    return (b/1048576).toFixed(1)+' MB';
 }
 function handleFile(file) {
+    if (!file||!file.type.startsWith('image/')) return;
+    currentFile=file; imgPreview.src=URL.createObjectURL(file);
+    previewWrap.classList.add('visible'); dropZone.style.display='none';
+    chipName.textContent=file.name; chipSize.textContent=formatBytes(file.size);
+    imgChip.classList.add('visible'); dotImg.classList.add('active');
     requestAnimationFrame(updateWires);
 }
 function clearImage() {
+    currentFile=null; imgPreview.src=''; previewWrap.classList.remove('visible');
+    dropZone.style.display=''; imgChip.classList.remove('visible');
+    chipName.textContent='—'; chipSize.textContent=''; fileInput.value='';
+    dotImg.classList.remove('active'); requestAnimationFrame(updateWires);
 }
 dropZone.onclick     = () => fileInput.click();
 fileInput.onchange   = e  => handleFile(e.target.files[0]);
 dotModel.classList.add('active');
 const MODEL_INFO = {
+    qwen_vl_2b: {
+        html:   `<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
+                 Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
+                 Strong spatial grounding, OCR &amp; instruction-following.`,
+        bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.30)',
+    },
+    qwen_vl_4b: {
+        html:   `<span class="model-badge qvl4b">QWEN3-VL · 4B</span><br><br>
+                 Qwen3-VL-4B-Instruct — enhanced vision-language model by Alibaba Cloud.
+                 Superior spatial grounding, richer OCR &amp; stronger multi-step reasoning.`,
+        bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
+    },
     qwen_4b_unredacted: {
         html:   `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
                  Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
                  Lightweight &amp; fast — ideal for quick Query, Caption, Point &amp; Detect tasks.`,
         bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
     },
     lfm_450: {
         html:   `<span class="model-badge lfm450">LFM · 450M</span><br><br>
                  LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
     Point:   'e.g., The gun held by the person.',
     Detect:  'e.g., The headlight of the car.',
 };
+categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value]||''; };
 // ══════════════════════════════════════════════
 //  ROBUST JSON EXTRACTOR
 // ══════════════════════════════════════════════
 function extractGroundingJSON(raw) {
+    // 1. Strip all <think>…</think> blocks (multi-pass)
+    let text = raw, prev = null;
     while (prev !== text) {
         prev = text;
         text = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
     }
+    // 2. Strip markdown fences, keep inner content
+    text = text.replace(/```(?:json)?([\s\S]*?)```/gi, '$1');
+    text = text.replace(/```/g, '').trim();
+    // Balanced bracket extractor
     function extractBalanced(str, startIdx, openCh, closeCh) {
+        let depth=0, inStr=false, esc=false;
+        for (let i=startIdx; i<str.length; i++) {
+            const c=str[i];
+            if (esc)          { esc=false; continue; }
+            if (c==='\\\\')   { esc=true;  continue; }
+            if (c==='"')      { inStr=!inStr; continue; }
+            if (inStr)        continue;
+            if (c===openCh)   depth++;
+            if (c===closeCh) {
                 depth--;
+                if (depth===0) {
+                    try { return JSON.parse(str.slice(startIdx, i+1)); } catch(_) { return null; }
                 }
             }
         }
         return null;
     }
+    // Search from the END — models emit JSON after reasoning prose
+    for (let i=text.length-1; i>=0; i--) {
+        if (text[i]==='[') { const r=extractBalanced(text,i,'[',']'); if(r!==null) return r; }
     }
+    for (let i=text.length-1; i>=0; i--) {
+        if (text[i]==='{') { const r=extractBalanced(text,i,'{','}'); if(r!==null) return r; }
     }
+    // Fallback: search from start
+    const fa=text.indexOf('['); if(fa!==-1){const r=extractBalanced(text,fa,'[',']');if(r!==null)return r;}
+    const fo=text.indexOf('{'); if(fo!==-1){const r=extractBalanced(text,fo,'{','}');if(r!==null)return r;}
     try { return JSON.parse(text); } catch(_) {}
     return null;
 }
 const groundCanvas      = document.getElementById('groundCanvas');
 const groundPlaceholder = document.getElementById('groundPlaceholder');
 const gCtx              = groundCanvas.getContext('2d');
+const downloadBtn       = document.getElementById('downloadBtn');
 const PALETTE = ['#4ecdc4','#7c6af7','#ff6b6b','#ffd93d','#6bcb77','#ff922b','#cc5de8','#339af0'];
 function hexToRgba(hex, alpha) {
+    const r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
     return `rgba(${r},${g},${b},${alpha})`;
 }
 function roundRect(ctx, x, y, w, h, r) {
+    ctx.beginPath(); ctx.moveTo(x+r,y);
     ctx.lineTo(x+w-r,y); ctx.quadraticCurveTo(x+w,y,x+w,y+r);
     ctx.lineTo(x+w,y+h-r); ctx.quadraticCurveTo(x+w,y+h,x+w-r,y+h);
     ctx.lineTo(x+r,y+h); ctx.quadraticCurveTo(x,y+h,x,y+h-r);
+    ctx.lineTo(x,y+r); ctx.quadraticCurveTo(x,y,x+r,y); ctx.closePath();
+}
+function updateDownloadBtn() {
+    // Build a timestamped filename and update the anchor href
+    const dataURL = groundCanvas.toDataURL('image/png');
+    const ts      = new Date().toISOString().replace(/[:.]/g,'-').slice(0,19);
+    downloadBtn.href     = dataURL;
+    downloadBtn.download = `grounding_${ts}.png`;
+    downloadBtn.style.display = 'flex';
 }
 function drawGrounding(imgSrc, rawText) {
     const parsed = extractGroundingJSON(rawText);
+    if (!parsed) { console.warn('Grounding: no JSON found in:', rawText.slice(0,200)); return; }
     const img = new Image();
     img.onload = () => {
+        const W=img.naturalWidth, H=img.naturalHeight;
+        groundCanvas.width=W; groundCanvas.height=H;
         gCtx.drawImage(img, 0, 0);
+        groundPlaceholder.style.display='none';
+        const lw=Math.max(2,W/200), fs=Math.max(12,W/40);
+        gCtx.lineWidth=lw;
+        gCtx.font=`bold ${fs}px JetBrains Mono, monospace`;
         const items = Array.isArray(parsed) ? parsed : [parsed];
         items.forEach((item, i) => {
             const col = PALETTE[i % PALETTE.length];
+            // ── Bounding box ─────────────────────────────
             let bbox = null;
+            if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length===4) bbox=item.bbox_2d;
+            else if (Array.isArray(item?.bbox) && item.bbox.length===4)   bbox=item.bbox;
+            else if (Array.isArray(item) && item.length===4 && item.every(n=>typeof n==='number')) bbox=item;
             if (bbox) {
+                let [x1,y1,x2,y2]=bbox.map(Number);
+                if (x1<=1&&y1<=1&&x2<=1&&y2<=1) { x1*=W;y1*=H;x2*=W;y2*=H; }
+                if (x2<x1)[x1,x2]=[x2,x1]; if (y2<y1)[y1,y2]=[y2,y1];
+                const bw=x2-x1, bh=y2-y1;
+                const lbl=(item?.label??`obj ${i+1}`).toString();
+                gCtx.fillStyle=hexToRgba(col,0.20); gCtx.fillRect(x1,y1,bw,bh);
+                gCtx.strokeStyle=col; gCtx.lineWidth=lw; gCtx.strokeRect(x1,y1,bw,bh);
+                const tw=gCtx.measureText(lbl).width, ph=fs*1.45, pw=tw+12;
+                const lx=x1, ly=Math.max(0,y1-ph);
+                gCtx.fillStyle=col; roundRect(gCtx,lx,ly,pw,ph,4); gCtx.fill();
+                gCtx.fillStyle='#fff'; gCtx.fillText(lbl,lx+6,ly+ph*0.76);
                 return;
             }
+            // ── Point ────────────────────────────────────
             let pt = null;
+            if (Array.isArray(item?.point_2d) && item.point_2d.length===2) pt=item.point_2d;
+            else if (Array.isArray(item?.point) && item.point.length===2)   pt=item.point;
+            else if (Array.isArray(item) && item.length===2 && item.every(n=>typeof n==='number')) pt=item;
             if (pt) {
+                let [x,y]=pt.map(Number);
+                if (x<=1&&y<=1){x*=W;y*=H;}
+                const r=Math.max(8,W/60);
+                const lbl=(item?.label??`pt ${i+1}`).toString();
+                gCtx.beginPath(); gCtx.arc(x,y,r*1.8,0,Math.PI*2);
+                gCtx.fillStyle=hexToRgba(col,0.18); gCtx.fill();
+                gCtx.beginPath(); gCtx.arc(x,y,r,0,Math.PI*2);
+                gCtx.fillStyle=col; gCtx.fill();
+                gCtx.strokeStyle='#fff'; gCtx.lineWidth=Math.max(1.5,lw); gCtx.stroke();
+                gCtx.fillStyle='#fff'; gCtx.fillText(lbl,x+r+5,y+fs*0.4);
             }
         });
+        // Enable download button after drawing
+        updateDownloadBtn();
     };
+    img.onerror = () => console.error('Grounding: failed to load image.');
     img.src = imgSrc;
 }
     copyBtn.classList.remove('copied');
     copyBtn.innerHTML = `
         <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
+             stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
             <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
             <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
         </svg> COPY`;
 }
 copyBtn.onclick = () => {
+    const txt = outputBox.innerText||'';
+    if (!txt||txt==='Results will stream here...') return;
     navigator.clipboard.writeText(txt).then(() => {
         copyBtn.classList.add('copied');
         copyBtn.innerHTML = `
             <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
+                 stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
                 <polyline points="20 6 9 17 4 12"/>
             </svg> COPIED`;
+        clearTimeout(copyTimer); copyTimer=setTimeout(resetCopyBtn,2000);
     }).catch(() => {
+        const ta=document.createElement('textarea'); ta.value=txt;
+        ta.style.position='fixed'; ta.style.opacity='0';
+        document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);
     });
 };
     const promptStr = promptInput.value.trim();
     if (!promptStr)  { alert('Please enter a prompt directive.'); return; }
+    runBtn.disabled=true; btnLoader.style.display='inline-block';
+    outputBox.innerText=''; outputBox.style.color='';
+    groundPlaceholder.style.display='flex';
+    gCtx.clearRect(0,0,groundCanvas.width,groundCanvas.height);
+    downloadBtn.style.display='none';
     dotTask.classList.add('active');
+    dotOut.classList.remove('active'); dotGnd.classList.remove('active');
+    allWires.forEach(id=>document.getElementById(id)?.classList.add('active'));
     resetCopyBtn();
+    const formData=new FormData();
     formData.append('image',    currentFile);
     formData.append('category', categorySelect.value);
     formData.append('prompt',   promptStr);
     formData.append('model_id', modelSelect.value);
+    let fullText='';
+    const imgObjectURL=URL.createObjectURL(currentFile);
     try {
+        const response=await fetch('/api/run',{method:'POST',body:formData});
+        if (!response.ok) { const err=await response.json(); throw new Error(err.error||'Execution failed.'); }
+        const reader=response.body.getReader(), decoder=new TextDecoder('utf-8');
+        let buffer='';
         while (true) {
+            const {value,done}=await reader.read(); if(done)break;
+            buffer+=decoder.decode(value,{stream:true});
+            const lines=buffer.split('\\n\\n'); buffer=lines.pop();
             for (const line of lines) {
                 if (!line.startsWith('data: ')) continue;
+                const payload=line.replace('data: ','');
+                if (payload==='[DONE]') break;
                 try {
+                    const data=JSON.parse(payload);
+                    if (data.chunk) { fullText+=data.chunk; outputBox.innerText=fullText; outputBox.scrollTop=outputBox.scrollHeight; }
                 } catch(_) {}
             }
         }
         dotOut.classList.add('active');
+        // Grounding overlay for Point / Detect
+        const cat=categorySelect.value;
+        if ((cat==='Point'||cat==='Detect') && fullText.trim()) {
+            const parsed=extractGroundingJSON(fullText);
+            if (parsed!==null) {
                 dotGnd.classList.add('active');
                 drawGrounding(imgObjectURL, fullText);
             } else {
             }
         }
+    } catch(err) {
+        outputBox.innerText=`[Error] ${err.message}`; outputBox.style.color='#ff6b6b';
     } finally {
+        runBtn.disabled=false; btnLoader.style.display='none';
         dotTask.classList.remove('active');
+        allWires.forEach(id=>document.getElementById(id)?.classList.remove('active'));
     }
 };
 </script>
 """
 if __name__ == "__main__":
+    app.launch(show_error=True)