prithivMLmods commited on
Commit
bca4e3d
·
verified ·
1 Parent(s): 1c706f5

update app [final] ✅

Browse files
Files changed (1) hide show
  1. app.py +333 -503
app.py CHANGED
@@ -36,17 +36,51 @@ DTYPE = (
36
  else torch.float16
37
  )
38
 
 
 
39
  QWEN_4B_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-4B-Unredacted-MAX"
40
  QWEN_4B_MODEL_NAME = "Qwen/Qwen3.5-4B"
41
  QWEN_2B_MODEL_NAME = "Qwen/Qwen3.5-2B"
42
- QWEN_VL_2B_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
43
- QWEN_VL_4B_MODEL_NAME = "Qwen/Qwen3-VL-4B-Instruct"
44
  LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
45
  GEMMA4_E2B_NAME = "google/gemma-4-E2B-it"
46
  LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
47
  QWEN_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
48
  QWEN25_VL_3B_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
51
  print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
52
  try:
@@ -86,40 +120,6 @@ except Exception as e:
86
  qwen_2b_model = None
87
  qwen_2b_processor = None
88
 
89
- # ── Qwen3-VL-2B-Instruct ────────────────────────────────
90
- print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
91
- try:
92
- qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
93
- QWEN_VL_2B_MODEL_NAME,
94
- trust_remote_code=True,
95
- torch_dtype=torch.bfloat16,
96
- ).to(DEVICE).eval()
97
- qwen_vl_2b_processor = AutoProcessor.from_pretrained(
98
- QWEN_VL_2B_MODEL_NAME, trust_remote_code=True
99
- )
100
- print("Qwen3-VL-2B model loaded successfully.")
101
- except Exception as e:
102
- print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
103
- qwen_vl_2b_model = None
104
- qwen_vl_2b_processor = None
105
-
106
- # ── Qwen3-VL-4B-Instruct ────────────────────────────────
107
- print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
108
- try:
109
- qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
110
- QWEN_VL_4B_MODEL_NAME,
111
- trust_remote_code=True,
112
- torch_dtype=torch.bfloat16,
113
- ).to(DEVICE).eval()
114
- qwen_vl_4b_processor = AutoProcessor.from_pretrained(
115
- QWEN_VL_4B_MODEL_NAME, trust_remote_code=True
116
- )
117
- print("Qwen3-VL-4B model loaded successfully.")
118
- except Exception as e:
119
- print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
120
- qwen_vl_4b_model = None
121
- qwen_vl_4b_processor = None
122
-
123
  # ── LFM2.5-VL-450M ──────────────────────────────────────
124
  print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
125
  try:
@@ -215,7 +215,7 @@ def safe_parse_json(text: str):
215
  # --- Inference Generator (Streaming) ---
216
  @spaces.GPU(duration=120)
217
  def generate_inference_stream(
218
- image: Image.Image, category: str, prompt: str, model_id: str = "qwen_4b_unredacted"
219
  ):
220
  if category == "Query":
221
  full_prompt = prompt
@@ -228,30 +228,30 @@ def generate_inference_stream(
228
  else:
229
  full_prompt = prompt
230
 
231
- # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────
232
- if model_id == "qwen_4b_unredacted":
233
- if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
234
- yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
235
  yield "data: [DONE]\n\n"
236
  return
237
  messages = [{"role": "user", "content": [
238
  {"type": "image", "image": image},
239
  {"type": "text", "text": full_prompt},
240
  ]}]
241
- text_input = qwen_4b_unredacted_processor.apply_chat_template(
242
  messages, tokenize=False, add_generation_prompt=True
243
  )
244
- inputs = qwen_4b_unredacted_processor(
245
  text=[text_input], images=[image], return_tensors="pt", padding=True
246
- ).to(qwen_4b_unredacted_model.device)
247
  streamer = TextIteratorStreamer(
248
- qwen_4b_unredacted_processor.tokenizer,
249
  skip_prompt=True, skip_special_tokens=True, timeout=120,
250
  )
251
  thread = threading.Thread(
252
- target=qwen_4b_unredacted_model.generate,
253
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
254
- use_cache=True, temperature=1.5, min_p=0.1),
255
  )
256
  thread.start()
257
  for tok in streamer:
@@ -259,30 +259,30 @@ def generate_inference_stream(
259
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
260
  thread.join()
261
 
262
- # ── Qwen3.5-4B ─────────────────────────────────────
263
- elif model_id == "qwen_4b":
264
- if qwen_4b_model is None or qwen_4b_processor is None:
265
- yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
266
  yield "data: [DONE]\n\n"
267
  return
268
  messages = [{"role": "user", "content": [
269
  {"type": "image", "image": image},
270
  {"type": "text", "text": full_prompt},
271
  ]}]
272
- text_input = qwen_4b_processor.apply_chat_template(
273
  messages, tokenize=False, add_generation_prompt=True
274
  )
275
- inputs = qwen_4b_processor(
276
  text=[text_input], images=[image], return_tensors="pt", padding=True
277
- ).to(qwen_4b_model.device)
278
  streamer = TextIteratorStreamer(
279
- qwen_4b_processor.tokenizer,
280
  skip_prompt=True, skip_special_tokens=True, timeout=120,
281
  )
282
  thread = threading.Thread(
283
- target=qwen_4b_model.generate,
284
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
285
- use_cache=True, temperature=1.5, min_p=0.1),
286
  )
287
  thread.start()
288
  for tok in streamer:
@@ -290,28 +290,28 @@ def generate_inference_stream(
290
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
291
  thread.join()
292
 
293
- # ── Qwen3.5-2B ──────────────────────────────────────
294
- elif model_id == "qwen_2b":
295
- if qwen_2b_model is None or qwen_2b_processor is None:
296
- yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B model not loaded.'})}\n\n"
297
  yield "data: [DONE]\n\n"
298
  return
299
  messages = [{"role": "user", "content": [
300
  {"type": "image", "image": image},
301
  {"type": "text", "text": full_prompt},
302
  ]}]
303
- text_input = qwen_2b_processor.apply_chat_template(
304
  messages, tokenize=False, add_generation_prompt=True
305
  )
306
- inputs = qwen_2b_processor(
307
  text=[text_input], images=[image], return_tensors="pt", padding=True
308
- ).to(qwen_2b_model.device)
309
  streamer = TextIteratorStreamer(
310
- qwen_2b_processor.tokenizer,
311
  skip_prompt=True, skip_special_tokens=True, timeout=120,
312
  )
313
  thread = threading.Thread(
314
- target=qwen_2b_model.generate,
315
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
316
  use_cache=True, temperature=1.5, min_p=0.1),
317
  )
@@ -321,30 +321,30 @@ def generate_inference_stream(
321
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
322
  thread.join()
323
 
324
- # ── Qwen3-VL-2B ─────────────────────────────────────
325
- elif model_id == "qwen_vl_2b":
326
- if qwen_vl_2b_model is None or qwen_vl_2b_processor is None:
327
- yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-2B model not loaded.'})}\n\n"
328
  yield "data: [DONE]\n\n"
329
  return
330
  messages = [{"role": "user", "content": [
331
  {"type": "image", "image": image},
332
  {"type": "text", "text": full_prompt},
333
  ]}]
334
- text_input = qwen_vl_2b_processor.apply_chat_template(
335
  messages, tokenize=False, add_generation_prompt=True
336
  )
337
- inputs = qwen_vl_2b_processor(
338
  text=[text_input], images=[image], return_tensors="pt", padding=True
339
- ).to(qwen_vl_2b_model.device)
340
  streamer = TextIteratorStreamer(
341
- qwen_vl_2b_processor.tokenizer,
342
  skip_prompt=True, skip_special_tokens=True, timeout=120,
343
  )
344
  thread = threading.Thread(
345
- target=qwen_vl_2b_model.generate,
346
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
347
- use_cache=True, temperature=1.0, do_sample=True),
348
  )
349
  thread.start()
350
  for tok in streamer:
@@ -352,30 +352,30 @@ def generate_inference_stream(
352
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
353
  thread.join()
354
 
355
- # ── Qwen3-VL-4B ─────────────────────────────────────
356
- elif model_id == "qwen_vl_4b":
357
- if qwen_vl_4b_model is None or qwen_vl_4b_processor is None:
358
- yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-4B model not loaded.'})}\n\n"
359
  yield "data: [DONE]\n\n"
360
  return
361
  messages = [{"role": "user", "content": [
362
  {"type": "image", "image": image},
363
  {"type": "text", "text": full_prompt},
364
  ]}]
365
- text_input = qwen_vl_4b_processor.apply_chat_template(
366
  messages, tokenize=False, add_generation_prompt=True
367
  )
368
- inputs = qwen_vl_4b_processor(
369
  text=[text_input], images=[image], return_tensors="pt", padding=True
370
- ).to(qwen_vl_4b_model.device)
371
  streamer = TextIteratorStreamer(
372
- qwen_vl_4b_processor.tokenizer,
373
  skip_prompt=True, skip_special_tokens=True, timeout=120,
374
  )
375
  thread = threading.Thread(
376
- target=qwen_vl_4b_model.generate,
377
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
378
- use_cache=True, temperature=1.0, do_sample=True),
379
  )
380
  thread.start()
381
  for tok in streamer:
@@ -546,7 +546,7 @@ async def run_inference(
546
  image: UploadFile = File(...),
547
  category: str = Form(...),
548
  prompt: str = Form(...),
549
- model_id: str = Form("qwen_4b_unredacted"),
550
  ):
551
  try:
552
  img_bytes = await image.read()
@@ -589,10 +589,8 @@ async def homepage(request: Request):
589
  }
590
  * { box-sizing: border-box; margin: 0; padding: 0; }
591
  html, body {
592
- min-height: 100%;
593
- background: var(--bg);
594
- color: var(--text);
595
- font-family: 'JetBrains Mono', monospace;
596
  }
597
  body {
598
  background-image:
@@ -605,13 +603,11 @@ async def homepage(request: Request):
605
  }
606
  /* ── Top Bar ── */
607
  .top-bar {
608
- position: sticky; top: 0; left: 0; right: 0;
609
- height: 42px;
610
  background: rgba(13,13,15,0.95);
611
  border-bottom: 1px solid var(--node-border);
612
  display: flex; align-items: center; padding: 0 20px;
613
- gap: 12px; z-index: 1000;
614
- backdrop-filter: blur(12px);
615
  }
616
  .top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
617
  .top-bar .sep { color: var(--node-border); }
@@ -625,146 +621,98 @@ async def homepage(request: Request):
625
  }
626
  /* ── Canvas ── */
627
  #canvas {
628
- position: relative;
629
- width: 1360px;
630
- min-height: calc(100vh - 42px);
631
- height: 900px;
632
- margin: 0 auto;
633
  }
634
  svg.wires {
635
  position: absolute; top: 0; left: 0;
636
  width: 100%; height: 100%;
637
- pointer-events: none; z-index: 2;
638
- overflow: visible;
639
- }
640
- path.wire {
641
- fill: none; stroke: var(--wire); stroke-width: 2.5;
642
- stroke-linecap: round;
643
  }
 
644
  path.wire.active {
645
  stroke: var(--wire-active); stroke-width: 3;
646
- stroke-dasharray: 8 4;
647
- animation: flow 0.6s linear infinite;
648
  }
649
  @keyframes flow { to { stroke-dashoffset: -24; } }
650
  /* ── Nodes ── */
651
  .node {
652
  position: absolute; width: 295px;
653
- background: var(--node-bg);
654
- border: 1px solid var(--node-border);
655
- border-radius: 9px;
656
- box-shadow: 0 8px 28px rgba(0,0,0,0.5);
657
- z-index: 10; display: flex; flex-direction: column;
658
- transition: box-shadow 0.2s;
659
- }
660
- .node:hover {
661
- box-shadow: 0 8px 28px rgba(0,0,0,0.5),
662
- 0 0 0 1px rgba(124,106,247,0.3);
663
  }
 
664
  .node.fixed-height { height: 340px; }
665
  .node-header {
666
- background: var(--node-header);
667
- padding: 7px 12px;
668
- border-bottom: 1px solid var(--node-border);
669
- border-radius: 9px 9px 0 0;
670
- font-size: 11px; font-weight: 700;
671
- cursor: grab;
672
  display: flex; justify-content: space-between; align-items: center;
673
  flex-shrink: 0; user-select: none;
674
  }
675
  .node-header:active { cursor: grabbing; }
676
  .node-header .id {
677
  font-size: 10px; color: var(--muted);
678
- background: rgba(255,255,255,0.04);
679
- padding: 2px 7px; border-radius: 4px;
680
- }
681
- .node-body {
682
- padding: 10px;
683
- display: flex; flex-direction: column; gap: 8px;
684
- flex: 1; overflow: hidden;
685
  }
 
686
  /* ── Ports ── */
687
  .port {
688
  position: absolute; width: 11px; height: 11px;
689
- background: var(--node-bg);
690
- border: 2px solid var(--port);
691
  border-radius: 50%; z-index: 30;
692
  }
693
  .port.out { right: -6px; }
694
  .port.in { left: -6px; }
695
  /* ── Labels ── */
696
  label {
697
- font-size: 10px; color: var(--muted);
698
- font-weight: 600; display: block; margin-bottom: 3px;
699
- letter-spacing: 0.07em; text-transform: uppercase;
700
  }
701
  input[type="file"] { display: none; }
702
  /* ── Upload Zone ── */
703
  .file-upload {
704
- border: 1.5px dashed var(--node-border);
705
- border-radius: 7px; padding: 12px 10px;
706
- text-align: center; cursor: pointer;
707
- font-size: 11px; color: var(--muted);
708
- transition: border-color 0.2s, background 0.2s;
709
- background: rgba(255,255,255,0.01);
710
  display: flex; flex-direction: column; align-items: center; gap: 5px;
711
  }
712
- .file-upload:hover {
713
- border-color: var(--accent);
714
- background: rgba(124,106,247,0.04);
715
- }
716
  .file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
717
  .file-upload:hover svg { opacity: 0.9; }
718
  /* ── Preview wrapper ── */
719
  .preview-wrap {
720
- display: none; position: relative;
721
- border-radius: 7px; overflow: hidden;
722
- border: 1px solid var(--node-border); background: #000;
723
  }
724
  .preview-wrap.visible { display: block; }
725
  .img-preview { width: 100%; height: 170px; object-fit: contain; display: block; }
726
  /* ── Clear button ── */
727
  .clear-btn {
728
- position: absolute; top: 6px; right: 6px;
729
- width: 24px; height: 24px; border-radius: 50%;
730
- background: rgba(13,13,15,0.80);
731
- border: 1px solid var(--node-border);
732
- color: var(--accent3); cursor: pointer;
733
  display: flex; align-items: center; justify-content: center;
734
  transition: background 0.18s, border-color 0.18s, transform 0.12s;
735
  z-index: 20; backdrop-filter: blur(6px);
736
  }
737
- .clear-btn:hover {
738
- background: rgba(255,107,107,0.18);
739
- border-color: var(--accent3); transform: scale(1.08);
740
- }
741
  .clear-btn:active { transform: scale(0.95); }
742
  .clear-btn svg { pointer-events: none; }
743
  /* ── Filename chip ── */
744
  .img-chip {
745
  display: none; align-items: center; gap: 6px;
746
- background: rgba(124,106,247,0.08);
747
- border: 1px solid rgba(124,106,247,0.22);
748
- border-radius: 5px; padding: 4px 8px;
749
- font-size: 9px; color: var(--muted); overflow: hidden;
750
  }
751
  .img-chip.visible { display: flex; }
752
- .img-chip .chip-dot {
753
- width: 5px; height: 5px; border-radius: 50%;
754
- background: var(--accent2); flex-shrink: 0;
755
- box-shadow: 0 0 4px var(--accent2);
756
- }
757
- .img-chip .chip-name {
758
- overflow: hidden; text-overflow: ellipsis;
759
- white-space: nowrap; flex: 1;
760
- color: var(--text); font-size: 9px;
761
- }
762
  .img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 9px; }
763
  select, textarea {
764
- width: 100%; background: rgba(0,0,0,0.3);
765
- border: 1px solid var(--node-border);
766
- color: var(--text); padding: 7px 9px;
767
- border-radius: 5px; outline: none;
768
  font-size: 11px; font-family: 'JetBrains Mono', monospace;
769
  resize: none; transition: border-color 0.2s;
770
  }
@@ -773,99 +721,77 @@ async def homepage(request: Request):
773
  button.run-btn {
774
  background: linear-gradient(135deg, var(--accent), #9b59b6);
775
  color: #fff; border: none; padding: 8px; border-radius: 6px;
776
- font-weight: 700; font-size: 11px;
777
- font-family: 'JetBrains Mono', monospace; cursor: pointer;
778
- transition: opacity 0.2s, transform 0.1s;
779
  display: flex; justify-content: center; align-items: center; gap: 8px;
780
  letter-spacing: 0.04em; flex-shrink: 0;
781
  }
782
- button.run-btn:hover { opacity: 0.9; }
783
- button.run-btn:active { transform: scale(0.98); }
784
  button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
785
  /* ── Output node ── */
786
- .output-node-body {
787
- padding: 10px; display: flex; flex-direction: column;
788
- gap: 6px; flex: 1; overflow: hidden;
789
- }
790
- .output-header-row {
791
- display: flex; align-items: center;
792
- justify-content: space-between; flex-shrink: 0;
793
- }
794
- /* ── Copy button ── */
795
- .copy-btn {
796
  display: flex; align-items: center; gap: 5px;
797
- background: rgba(124,106,247,0.10);
798
- border: 1px solid rgba(124,106,247,0.25);
799
  border-radius: 5px; padding: 3px 8px;
800
- font-size: 9px; font-weight: 700;
801
- font-family: 'JetBrains Mono', monospace;
802
- color: var(--accent); cursor: pointer;
803
- letter-spacing: 0.05em;
804
- transition: background 0.18s, border-color 0.18s, transform 0.1s;
805
- flex-shrink: 0;
806
  }
807
- .copy-btn:hover { background: rgba(124,106,247,0.22); border-color: var(--accent); }
808
- .copy-btn:active { transform: scale(0.95); }
809
- .copy-btn.copied {
810
- background: rgba(78,205,196,0.15);
811
- border-color: var(--accent2); color: var(--accent2);
812
  }
813
- .copy-btn svg { pointer-events: none; flex-shrink: 0; }
 
 
814
  .output-box {
815
- background: rgba(0,0,0,0.4);
816
- border: 1px solid var(--node-border);
817
- border-radius: 5px; padding: 10px;
818
- flex: 1; overflow-y: auto;
819
- font-size: 11px; line-height: 1.6;
820
- color: #c8c8e0; white-space: pre-wrap;
821
- user-select: text;
822
- font-family: 'JetBrains Mono', monospace; min-height: 0;
823
  }
824
- /* ── Grounding ── */
 
 
825
  .ground-canvas-wrap {
826
- position: relative; flex: 1;
827
- border: 1px solid var(--node-border);
828
- border-radius: 5px; overflow: hidden;
829
- background: #000; min-height: 0;
830
  }
831
  .ground-canvas-wrap canvas { width: 100%; height: 100%; object-fit: contain; display: block; }
832
  .ground-placeholder {
833
- position: absolute; inset: 0;
834
- display: flex; align-items: center; justify-content: center;
835
- font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
836
  }
837
  .loader {
838
- width: 11px; height: 11px;
839
- border: 2px solid rgba(255,255,255,0.3);
840
  border-top-color: #fff; border-radius: 50%;
841
  animation: spin 0.7s linear infinite; display: none;
842
  }
843
  @keyframes spin { to { transform: rotate(360deg); } }
844
- .status-dot {
845
- width: 6px; height: 6px; border-radius: 50%;
846
- background: var(--muted); display: inline-block; margin-right: 6px;
847
- }
848
  .status-dot.active { background: var(--accent2); box-shadow: 0 0 5px var(--accent2); }
849
  /* ── Model badges ── */
850
  .model-badge {
851
- display: inline-block; padding: 2px 7px;
852
- border-radius: 4px; font-size: 9px; font-weight: 700;
853
- letter-spacing: 0.06em; text-transform: uppercase;
854
  }
 
 
855
  .model-badge.q4bunred { background: rgba(255,80,80,0.18); color: #ff5050; border: 1px solid rgba(255,80,80,0.40); }
856
  .model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
857
  .model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
858
- .model-badge.qvl2b { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
859
- .model-badge.qvl4b { background: rgba(255,100,80,0.15); color: #ff6450; border: 1px solid rgba(255,100,80,0.35); }
860
  .model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
861
  .model-badge.g4e2b { background: rgba(66,197,107,0.15); color: #42c56b; border: 1px solid rgba(66,197,107,0.35); }
862
  .model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
863
  .model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
864
  .model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
865
- .model-info-box {
866
- border-radius: 6px; padding: 9px;
867
- font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0;
868
- }
869
  .canvas-footer { height: 36px; }
870
  </style>
871
  </head>
@@ -897,8 +823,7 @@ async def homepage(request: Request):
897
  <label>Upload Image</label>
898
  <div class="file-upload" id="dropZone">
899
  <svg width="30" height="30" viewBox="0 0 24 24" fill="none"
900
- stroke="#7c6af7" stroke-width="1.5"
901
- stroke-linecap="round" stroke-linejoin="round">
902
  <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
903
  <circle cx="8.5" cy="8.5" r="1.5"/>
904
  <polyline points="21 15 16 10 5 21"/>
@@ -910,8 +835,7 @@ async def homepage(request: Request):
910
  <img id="imgPreview" class="img-preview" />
911
  <button class="clear-btn" id="clearBtn" title="Remove image">
912
  <svg width="12" height="12" viewBox="0 0 24 24" fill="none"
913
- stroke="currentColor" stroke-width="2.5"
914
- stroke-linecap="round" stroke-linejoin="round">
915
  <line x1="18" y1="6" x2="6" y2="18"/>
916
  <line x1="6" y1="6" x2="18" y2="18"/>
917
  </svg>
@@ -937,11 +861,11 @@ async def homepage(request: Request):
937
  <div>
938
  <label>Active Model</label>
939
  <select id="modelSelect">
 
 
940
  <option value="qwen_4b_unredacted">Qwen3.5-4B-Unredacted-MAX</option>
941
  <option value="qwen_4b">Qwen3.5-4B</option>
942
  <option value="qwen_2b">Qwen3.5-2B</option>
943
- <option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
944
- <option value="qwen_vl_4b">Qwen3-VL-4B-Instruct</option>
945
  <option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
946
  <option value="gemma4_e2b">Gemma4-E2B-it (Google)</option>
947
  <option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
@@ -950,10 +874,10 @@ async def homepage(request: Request):
950
  </select>
951
  </div>
952
  <div id="modelInfoBox" class="model-info-box"
953
- style="background:rgba(255,80,80,0.07);border:1px solid rgba(255,80,80,0.3);">
954
- <span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
955
- Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
956
- with extended instruction-following &amp; unrestricted reasoning.
957
  </div>
958
  <div style="flex:1;"></div>
959
  </div>
@@ -1000,10 +924,9 @@ async def homepage(request: Request):
1000
  <div class="output-node-body">
1001
  <div class="output-header-row">
1002
  <label style="margin-bottom:0;">Streamed Result</label>
1003
- <button class="copy-btn" id="copyBtn" title="Copy result to clipboard">
1004
  <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
1005
- stroke="currentColor" stroke-width="2.2"
1006
- stroke-linecap="round" stroke-linejoin="round">
1007
  <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
1008
  <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
1009
  </svg>
@@ -1021,8 +944,19 @@ async def homepage(request: Request):
1021
  <span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
1022
  <span class="id">ID: 05</span>
1023
  </div>
1024
- <div class="node-body">
1025
- <label>Point / Detect Overlay</label>
 
 
 
 
 
 
 
 
 
 
 
1026
  <div class="ground-canvas-wrap">
1027
  <canvas id="groundCanvas"></canvas>
1028
  <div class="ground-placeholder" id="groundPlaceholder">
@@ -1042,9 +976,8 @@ async def homepage(request: Request):
1042
  const canvasEl = document.getElementById('canvas');
1043
  function portCenter(id) {
1044
  const el = document.getElementById(id);
1045
- if (!el) return { x:0, y:0 };
1046
- const er = el.getBoundingClientRect();
1047
- const cr = canvasEl.getBoundingClientRect();
1048
  return { x: er.left + er.width/2 - cr.left, y: er.top + er.height/2 - cr.top };
1049
  }
1050
  function bezier(p1, p2) {
@@ -1071,17 +1004,16 @@ document.querySelectorAll('.node').forEach(node => {
1071
  const header = node.querySelector('.node-header');
1072
  let drag = false, sx, sy, il, it;
1073
  header.addEventListener('mousedown', e => {
1074
- drag = true; sx = e.clientX; sy = e.clientY;
1075
- il = parseInt(node.style.left)||0; it = parseInt(node.style.top)||0;
1076
- node.style.zIndex = 100; e.preventDefault();
1077
  });
1078
  document.addEventListener('mousemove', e => {
1079
  if (!drag) return;
1080
- node.style.left = `${il + e.clientX - sx}px`;
1081
- node.style.top = `${it + e.clientY - sy}px`;
1082
  updateWires();
1083
  });
1084
- document.addEventListener('mouseup', () => { if (drag) { drag=false; node.style.zIndex=10; } });
1085
  });
1086
  window.addEventListener('resize', updateWires);
1087
  window.addEventListener('scroll', updateWires);
@@ -1103,30 +1035,22 @@ const chipSize = document.getElementById('chipSize');
1103
  const dotImg = document.getElementById('dot-img');
1104
 
1105
  function formatBytes(b) {
1106
- if (b < 1024) return b + ' B';
1107
- if (b < 1048576) return (b/1024).toFixed(1) + ' KB';
1108
- return (b/1048576).toFixed(1) + ' MB';
1109
  }
1110
  function handleFile(file) {
1111
- if (!file || !file.type.startsWith('image/')) return;
1112
- currentFile = file;
1113
- imgPreview.src = URL.createObjectURL(file);
1114
- previewWrap.classList.add('visible');
1115
- dropZone.style.display = 'none';
1116
- chipName.textContent = file.name;
1117
- chipSize.textContent = formatBytes(file.size);
1118
- imgChip.classList.add('visible');
1119
- dotImg.classList.add('active');
1120
  requestAnimationFrame(updateWires);
1121
  }
1122
  function clearImage() {
1123
- currentFile = null; imgPreview.src = '';
1124
- previewWrap.classList.remove('visible');
1125
- dropZone.style.display = '';
1126
- imgChip.classList.remove('visible');
1127
- chipName.textContent = '—'; chipSize.textContent = '';
1128
- fileInput.value = ''; dotImg.classList.remove('active');
1129
- requestAnimationFrame(updateWires);
1130
  }
1131
  dropZone.onclick = () => fileInput.click();
1132
  fileInput.onchange = e => handleFile(e.target.files[0]);
@@ -1147,6 +1071,18 @@ const dotModel = document.getElementById('dot-model');
1147
  dotModel.classList.add('active');
1148
 
1149
  const MODEL_INFO = {
 
 
 
 
 
 
 
 
 
 
 
 
1150
  qwen_4b_unredacted: {
1151
  html: `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
1152
  Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
@@ -1165,18 +1101,6 @@ const MODEL_INFO = {
1165
  Lightweight &amp; fast — ideal for quick Query, Caption, Point &amp; Detect tasks.`,
1166
  bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
1167
  },
1168
- qwen_vl_2b: {
1169
- html: `<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
1170
- Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
1171
- Strong spatial grounding, OCR &amp; instruction-following.`,
1172
- bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.25)',
1173
- },
1174
- qwen_vl_4b: {
1175
- html: `<span class="model-badge qvl4b">QWEN3-VL · 4B</span><br><br>
1176
- Qwen3-VL-4B-Instruct — enhanced vision-language model by Alibaba Cloud.
1177
- Superior spatial grounding, richer OCR &amp; stronger multi-step reasoning.`,
1178
- bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
1179
- },
1180
  lfm_450: {
1181
  html: `<span class="model-badge lfm450">LFM · 450M</span><br><br>
1182
  LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
@@ -1227,99 +1151,52 @@ const PLACEHOLDERS = {
1227
  Point: 'e.g., The gun held by the person.',
1228
  Detect: 'e.g., The headlight of the car.',
1229
  };
1230
- categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value] || ''; };
1231
 
1232
  // ══════════════════════════════════════════════
1233
  // ROBUST JSON EXTRACTOR
1234
- // Strategy:
1235
- // 1. Strip ALL <think>…</think> blocks (greedy,
1236
- // handles the tag appearing after the JSON too)
1237
- // 2. Strip markdown fences
1238
- // 3. Find the LAST occurrence of a JSON array [ ]
1239
- // or object { } — models typically emit the
1240
- // clean JSON block after their reasoning prose
1241
- // 4. Use a bracket-depth walker to extract it
1242
- // precisely without cutting off nested objects
1243
  // ══════════════════════════════════════════════
1244
  function extractGroundingJSON(raw) {
1245
- // Step 1 kill ALL <think> </think> sections
1246
- // Use greedy .* with DOTALL flag emulation via [\s\S]
1247
- // Run multiple passes in case of nested/malformed tags
1248
- let text = raw;
1249
- let prev = null;
1250
  while (prev !== text) {
1251
  prev = text;
1252
  text = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
1253
  }
 
 
 
1254
 
1255
- // Step 2 — strip markdown code fences ```json … ```
1256
- text = text.replace(/```(?:json)?[\\s\\S]*?```/gi, function(m) {
1257
- // Keep the inner content, just remove the fences
1258
- return m.replace(/```(?:json)?/gi, '').replace(/```/g, '');
1259
- });
1260
-
1261
- // Step 3 — strip any remaining lone fence markers
1262
- text = text.replace(/```/g, '');
1263
- text = text.trim();
1264
-
1265
- // Helper: walk from startIdx and extract a balanced
1266
- // bracket expression (open/close must match).
1267
  function extractBalanced(str, startIdx, openCh, closeCh) {
1268
- let depth = 0, inStr = false, esc = false;
1269
- for (let i = startIdx; i < str.length; i++) {
1270
- const c = str[i];
1271
- if (esc) { esc = false; continue; }
1272
- if (c === '\\\\') { esc = true; continue; }
1273
- if (c === '"') { inStr = !inStr; continue; }
1274
- if (inStr) continue;
1275
- if (c === openCh) depth++;
1276
- if (c === closeCh) {
1277
  depth--;
1278
- if (depth === 0) {
1279
- try { return JSON.parse(str.slice(startIdx, i + 1)); }
1280
- catch(_) { return null; }
1281
  }
1282
  }
1283
  }
1284
  return null;
1285
  }
1286
 
1287
- // Step 4 find the LAST JSON array in the text
1288
- // (models often emit prose first, JSON last)
1289
- let lastArrIdx = -1;
1290
- for (let i = text.length - 1; i >= 0; i--) {
1291
- if (text[i] === '[') { lastArrIdx = i; break; }
1292
- }
1293
- if (lastArrIdx !== -1) {
1294
- const result = extractBalanced(text, lastArrIdx, '[', ']');
1295
- if (result !== null) return result;
1296
- }
1297
-
1298
- // Step 5 — find the LAST JSON object in the text
1299
- let lastObjIdx = -1;
1300
- for (let i = text.length - 1; i >= 0; i--) {
1301
- if (text[i] === '{') { lastObjIdx = i; break; }
1302
  }
1303
- if (lastObjIdx !== -1) {
1304
- const result = extractBalanced(text, lastObjIdx, '{', '}');
1305
- if (result !== null) return result;
1306
  }
1307
-
1308
- // Step 6 — try FIRST array (fallback)
1309
- const firstArr = text.indexOf('[');
1310
- if (firstArr !== -1) {
1311
- const result = extractBalanced(text, firstArr, '[', ']');
1312
- if (result !== null) return result;
1313
- }
1314
-
1315
- // Step 7 — try FIRST object (fallback)
1316
- const firstObj = text.indexOf('{');
1317
- if (firstObj !== -1) {
1318
- const result = extractBalanced(text, firstObj, '{', '}');
1319
- if (result !== null) return result;
1320
- }
1321
-
1322
- // Step 8 — last resort full parse
1323
  try { return JSON.parse(text); } catch(_) {}
1324
  return null;
1325
  }
@@ -1330,122 +1207,95 @@ function extractGroundingJSON(raw) {
1330
  const groundCanvas = document.getElementById('groundCanvas');
1331
  const groundPlaceholder = document.getElementById('groundPlaceholder');
1332
  const gCtx = groundCanvas.getContext('2d');
 
1333
 
1334
  const PALETTE = ['#4ecdc4','#7c6af7','#ff6b6b','#ffd93d','#6bcb77','#ff922b','#cc5de8','#339af0'];
1335
 
1336
  function hexToRgba(hex, alpha) {
1337
- const r = parseInt(hex.slice(1,3),16);
1338
- const g = parseInt(hex.slice(3,5),16);
1339
- const b = parseInt(hex.slice(5,7),16);
1340
  return `rgba(${r},${g},${b},${alpha})`;
1341
  }
1342
  function roundRect(ctx, x, y, w, h, r) {
1343
- ctx.beginPath();
1344
- ctx.moveTo(x+r,y);
1345
  ctx.lineTo(x+w-r,y); ctx.quadraticCurveTo(x+w,y,x+w,y+r);
1346
  ctx.lineTo(x+w,y+h-r); ctx.quadraticCurveTo(x+w,y+h,x+w-r,y+h);
1347
  ctx.lineTo(x+r,y+h); ctx.quadraticCurveTo(x,y+h,x,y+h-r);
1348
- ctx.lineTo(x,y+r); ctx.quadraticCurveTo(x,y,x+r,y);
1349
- ctx.closePath();
 
 
 
 
 
 
 
 
1350
  }
1351
 
1352
  function drawGrounding(imgSrc, rawText) {
1353
  const parsed = extractGroundingJSON(rawText);
1354
- if (!parsed) {
1355
- console.warn('Grounding: could not extract JSON:', rawText.slice(0, 200));
1356
- return;
1357
- }
1358
 
1359
  const img = new Image();
1360
  img.onload = () => {
1361
- const W = img.naturalWidth, H = img.naturalHeight;
1362
- groundCanvas.width = W;
1363
- groundCanvas.height = H;
1364
  gCtx.drawImage(img, 0, 0);
1365
- groundPlaceholder.style.display = 'none';
1366
 
1367
- const lw = Math.max(2, W/200);
1368
- const fs = Math.max(12, W/40);
1369
- gCtx.lineWidth = lw;
1370
- gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
1371
 
1372
  const items = Array.isArray(parsed) ? parsed : [parsed];
1373
-
1374
  items.forEach((item, i) => {
1375
  const col = PALETTE[i % PALETTE.length];
1376
 
1377
- // ── Detect: bounding box ─────────────────────
1378
  let bbox = null;
1379
- if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4)
1380
- bbox = item.bbox_2d;
1381
- else if (Array.isArray(item?.bbox) && item.bbox.length === 4)
1382
- bbox = item.bbox;
1383
- else if (Array.isArray(item) && item.length === 4
1384
- && item.every(n => typeof n === 'number'))
1385
- bbox = item;
1386
 
1387
  if (bbox) {
1388
- let [x1,y1,x2,y2] = bbox.map(Number);
1389
- // Handle normalised 0-1 coords
1390
- if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
1391
- x1*=W; y1*=H; x2*=W; y2*=H;
1392
- }
1393
- // Swap if inverted
1394
- if (x2 < x1) [x1,x2] = [x2,x1];
1395
- if (y2 < y1) [y1,y2] = [y2,y1];
1396
-
1397
- const bw = x2-x1, bh = y2-y1;
1398
- const lbl = (item?.label ?? `obj ${i+1}`).toString();
1399
-
1400
- gCtx.fillStyle = hexToRgba(col, 0.20);
1401
- gCtx.fillRect(x1,y1,bw,bh);
1402
- gCtx.strokeStyle = col;
1403
- gCtx.lineWidth = lw;
1404
- gCtx.strokeRect(x1,y1,bw,bh);
1405
-
1406
- const tw = gCtx.measureText(lbl).width;
1407
- const ph = fs*1.45, pw = tw+12;
1408
- const lx = x1, ly = Math.max(0, y1-ph);
1409
- gCtx.fillStyle = col;
1410
- roundRect(gCtx,lx,ly,pw,ph,4); gCtx.fill();
1411
- gCtx.fillStyle = '#fff';
1412
- gCtx.fillText(lbl, lx+6, ly+ph*0.76);
1413
  return;
1414
  }
1415
 
1416
- // ── Point: 2-D coordinate ─────────────────────
1417
  let pt = null;
1418
- if (Array.isArray(item?.point_2d) && item.point_2d.length === 2)
1419
- pt = item.point_2d;
1420
- else if (Array.isArray(item?.point) && item.point.length === 2)
1421
- pt = item.point;
1422
- else if (Array.isArray(item) && item.length === 2
1423
- && item.every(n => typeof n === 'number'))
1424
- pt = item;
1425
 
1426
  if (pt) {
1427
- let [x,y] = pt.map(Number);
1428
- if (x <= 1 && y <= 1) { x*=W; y*=H; }
1429
- const r = Math.max(8, W/60);
1430
- const lbl = (item?.label ?? `pt ${i+1}`).toString();
1431
-
1432
- gCtx.beginPath();
1433
- gCtx.arc(x, y, r*1.8, 0, Math.PI*2);
1434
- gCtx.fillStyle = hexToRgba(col, 0.18); gCtx.fill();
1435
-
1436
- gCtx.beginPath();
1437
- gCtx.arc(x, y, r, 0, Math.PI*2);
1438
- gCtx.fillStyle = col; gCtx.fill();
1439
- gCtx.strokeStyle = '#fff';
1440
- gCtx.lineWidth = Math.max(1.5, lw);
1441
- gCtx.stroke();
1442
-
1443
- gCtx.fillStyle = '#fff';
1444
- gCtx.fillText(lbl, x+r+5, y+fs*0.4);
1445
  }
1446
  });
 
 
 
1447
  };
1448
- img.onerror = () => console.error('Grounding: failed to load image for overlay.');
1449
  img.src = imgSrc;
1450
  }
1451
 
@@ -1460,31 +1310,26 @@ function resetCopyBtn() {
1460
  copyBtn.classList.remove('copied');
1461
  copyBtn.innerHTML = `
1462
  <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
1463
- stroke="currentColor" stroke-width="2.2"
1464
- stroke-linecap="round" stroke-linejoin="round">
1465
  <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
1466
  <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
1467
  </svg> COPY`;
1468
  }
1469
-
1470
  copyBtn.onclick = () => {
1471
- const txt = outputBox.innerText || '';
1472
- if (!txt || txt === 'Results will stream here...') return;
1473
  navigator.clipboard.writeText(txt).then(() => {
1474
  copyBtn.classList.add('copied');
1475
  copyBtn.innerHTML = `
1476
  <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
1477
- stroke="currentColor" stroke-width="2.5"
1478
- stroke-linecap="round" stroke-linejoin="round">
1479
  <polyline points="20 6 9 17 4 12"/>
1480
  </svg> COPIED`;
1481
- clearTimeout(copyTimer);
1482
- copyTimer = setTimeout(resetCopyBtn, 2000);
1483
  }).catch(() => {
1484
- const ta = document.createElement('textarea');
1485
- ta.value = txt; ta.style.position='fixed'; ta.style.opacity='0';
1486
- document.body.appendChild(ta); ta.select();
1487
- document.execCommand('copy'); document.body.removeChild(ta);
1488
  });
1489
  };
1490
 
@@ -1503,66 +1348,53 @@ runBtn.onclick = async () => {
1503
  const promptStr = promptInput.value.trim();
1504
  if (!promptStr) { alert('Please enter a prompt directive.'); return; }
1505
 
1506
- runBtn.disabled = true;
1507
- btnLoader.style.display = 'inline-block';
1508
- outputBox.innerText = '';
1509
- outputBox.style.color = '';
1510
- groundPlaceholder.style.display = 'flex';
1511
- gCtx.clearRect(0, 0, groundCanvas.width, groundCanvas.height);
1512
  dotTask.classList.add('active');
1513
- dotOut.classList.remove('active');
1514
- dotGnd.classList.remove('active');
1515
- allWires.forEach(id => document.getElementById(id)?.classList.add('active'));
1516
  resetCopyBtn();
1517
 
1518
- const formData = new FormData();
1519
  formData.append('image', currentFile);
1520
  formData.append('category', categorySelect.value);
1521
  formData.append('prompt', promptStr);
1522
  formData.append('model_id', modelSelect.value);
1523
 
1524
- let fullText = '';
1525
- let imgObjectURL = URL.createObjectURL(currentFile);
1526
 
1527
  try {
1528
- const response = await fetch('/api/run', { method:'POST', body:formData });
1529
- if (!response.ok) {
1530
- const err = await response.json();
1531
- throw new Error(err.error || 'Execution failed.');
1532
- }
1533
-
1534
- const reader = response.body.getReader();
1535
- const decoder = new TextDecoder('utf-8');
1536
- let buffer = '';
1537
 
 
 
1538
  while (true) {
1539
- const { value, done } = await reader.read();
1540
- if (done) break;
1541
- buffer += decoder.decode(value, { stream:true });
1542
- const lines = buffer.split('\\n\\n');
1543
- buffer = lines.pop();
1544
  for (const line of lines) {
1545
  if (!line.startsWith('data: ')) continue;
1546
- const payload = line.replace('data: ','');
1547
- if (payload === '[DONE]') break;
1548
  try {
1549
- const data = JSON.parse(payload);
1550
- if (data.chunk) {
1551
- fullText += data.chunk;
1552
- outputBox.innerText = fullText;
1553
- outputBox.scrollTop = outputBox.scrollHeight;
1554
- }
1555
  } catch(_) {}
1556
  }
1557
  }
1558
 
1559
  dotOut.classList.add('active');
1560
 
1561
- // ── Grounding overlay (Point / Detect) ──────────
1562
- const cat = categorySelect.value;
1563
- if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
1564
- const parsed = extractGroundingJSON(fullText);
1565
- if (parsed !== null) {
1566
  dotGnd.classList.add('active');
1567
  drawGrounding(imgObjectURL, fullText);
1568
  } else {
@@ -1570,14 +1402,12 @@ runBtn.onclick = async () => {
1570
  }
1571
  }
1572
 
1573
- } catch (err) {
1574
- outputBox.innerText = `[Error] ${err.message}`;
1575
- outputBox.style.color = '#ff6b6b';
1576
  } finally {
1577
- runBtn.disabled = false;
1578
- btnLoader.style.display = 'none';
1579
  dotTask.classList.remove('active');
1580
- allWires.forEach(id => document.getElementById(id)?.classList.remove('active'));
1581
  }
1582
  };
1583
  </script>
@@ -1586,4 +1416,4 @@ runBtn.onclick = async () => {
1586
  """
1587
 
1588
  if __name__ == "__main__":
1589
- app.launch(show_error=True, ssr_mode=False)
 
36
  else torch.float16
37
  )
38
 
39
+ QWEN_VL_2B_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
40
+ QWEN_VL_4B_MODEL_NAME = "Qwen/Qwen3-VL-4B-Instruct"
41
  QWEN_4B_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-4B-Unredacted-MAX"
42
  QWEN_4B_MODEL_NAME = "Qwen/Qwen3.5-4B"
43
  QWEN_2B_MODEL_NAME = "Qwen/Qwen3.5-2B"
 
 
44
  LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
45
  GEMMA4_E2B_NAME = "google/gemma-4-E2B-it"
46
  LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
47
  QWEN_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
48
  QWEN25_VL_3B_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
49
 
50
+ # ── Qwen3-VL-2B-Instruct ────────────────────────────────
51
+ print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
52
+ try:
53
+ qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
54
+ QWEN_VL_2B_MODEL_NAME,
55
+ trust_remote_code=True,
56
+ torch_dtype=torch.bfloat16,
57
+ ).to(DEVICE).eval()
58
+ qwen_vl_2b_processor = AutoProcessor.from_pretrained(
59
+ QWEN_VL_2B_MODEL_NAME, trust_remote_code=True
60
+ )
61
+ print("Qwen3-VL-2B model loaded successfully.")
62
+ except Exception as e:
63
+ print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
64
+ qwen_vl_2b_model = None
65
+ qwen_vl_2b_processor = None
66
+
67
+ # ── Qwen3-VL-4B-Instruct ────────────────────────────────
68
+ print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
69
+ try:
70
+ qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
71
+ QWEN_VL_4B_MODEL_NAME,
72
+ trust_remote_code=True,
73
+ torch_dtype=torch.bfloat16,
74
+ ).to(DEVICE).eval()
75
+ qwen_vl_4b_processor = AutoProcessor.from_pretrained(
76
+ QWEN_VL_4B_MODEL_NAME, trust_remote_code=True
77
+ )
78
+ print("Qwen3-VL-4B model loaded successfully.")
79
+ except Exception as e:
80
+ print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
81
+ qwen_vl_4b_model = None
82
+ qwen_vl_4b_processor = None
83
+
84
  # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
85
  print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
86
  try:
 
120
  qwen_2b_model = None
121
  qwen_2b_processor = None
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  # ── LFM2.5-VL-450M ──────────────────────────────────────
124
  print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
125
  try:
 
215
  # --- Inference Generator (Streaming) ---
216
  @spaces.GPU(duration=120)
217
  def generate_inference_stream(
218
+ image: Image.Image, category: str, prompt: str, model_id: str = "qwen_vl_2b"
219
  ):
220
  if category == "Query":
221
  full_prompt = prompt
 
228
  else:
229
  full_prompt = prompt
230
 
231
+ # ── Qwen3-VL-2B ─────────────���───────────────────────
232
+ if model_id == "qwen_vl_2b":
233
+ if qwen_vl_2b_model is None or qwen_vl_2b_processor is None:
234
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-2B model not loaded.'})}\n\n"
235
  yield "data: [DONE]\n\n"
236
  return
237
  messages = [{"role": "user", "content": [
238
  {"type": "image", "image": image},
239
  {"type": "text", "text": full_prompt},
240
  ]}]
241
+ text_input = qwen_vl_2b_processor.apply_chat_template(
242
  messages, tokenize=False, add_generation_prompt=True
243
  )
244
+ inputs = qwen_vl_2b_processor(
245
  text=[text_input], images=[image], return_tensors="pt", padding=True
246
+ ).to(qwen_vl_2b_model.device)
247
  streamer = TextIteratorStreamer(
248
+ qwen_vl_2b_processor.tokenizer,
249
  skip_prompt=True, skip_special_tokens=True, timeout=120,
250
  )
251
  thread = threading.Thread(
252
+ target=qwen_vl_2b_model.generate,
253
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
254
+ use_cache=True, temperature=1.0, do_sample=True),
255
  )
256
  thread.start()
257
  for tok in streamer:
 
259
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
260
  thread.join()
261
 
262
+ # ── Qwen3-VL-4B ─────────────────────────────────────
263
+ elif model_id == "qwen_vl_4b":
264
+ if qwen_vl_4b_model is None or qwen_vl_4b_processor is None:
265
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-4B model not loaded.'})}\n\n"
266
  yield "data: [DONE]\n\n"
267
  return
268
  messages = [{"role": "user", "content": [
269
  {"type": "image", "image": image},
270
  {"type": "text", "text": full_prompt},
271
  ]}]
272
+ text_input = qwen_vl_4b_processor.apply_chat_template(
273
  messages, tokenize=False, add_generation_prompt=True
274
  )
275
+ inputs = qwen_vl_4b_processor(
276
  text=[text_input], images=[image], return_tensors="pt", padding=True
277
+ ).to(qwen_vl_4b_model.device)
278
  streamer = TextIteratorStreamer(
279
+ qwen_vl_4b_processor.tokenizer,
280
  skip_prompt=True, skip_special_tokens=True, timeout=120,
281
  )
282
  thread = threading.Thread(
283
+ target=qwen_vl_4b_model.generate,
284
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
285
+ use_cache=True, temperature=1.0, do_sample=True),
286
  )
287
  thread.start()
288
  for tok in streamer:
 
290
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
291
  thread.join()
292
 
293
+ # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────
294
+ elif model_id == "qwen_4b_unredacted":
295
+ if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
296
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
297
  yield "data: [DONE]\n\n"
298
  return
299
  messages = [{"role": "user", "content": [
300
  {"type": "image", "image": image},
301
  {"type": "text", "text": full_prompt},
302
  ]}]
303
+ text_input = qwen_4b_unredacted_processor.apply_chat_template(
304
  messages, tokenize=False, add_generation_prompt=True
305
  )
306
+ inputs = qwen_4b_unredacted_processor(
307
  text=[text_input], images=[image], return_tensors="pt", padding=True
308
+ ).to(qwen_4b_unredacted_model.device)
309
  streamer = TextIteratorStreamer(
310
+ qwen_4b_unredacted_processor.tokenizer,
311
  skip_prompt=True, skip_special_tokens=True, timeout=120,
312
  )
313
  thread = threading.Thread(
314
+ target=qwen_4b_unredacted_model.generate,
315
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
316
  use_cache=True, temperature=1.5, min_p=0.1),
317
  )
 
321
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
322
  thread.join()
323
 
324
+ # ── Qwen3.5-4B ─────────────────────────────────────
325
+ elif model_id == "qwen_4b":
326
+ if qwen_4b_model is None or qwen_4b_processor is None:
327
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
328
  yield "data: [DONE]\n\n"
329
  return
330
  messages = [{"role": "user", "content": [
331
  {"type": "image", "image": image},
332
  {"type": "text", "text": full_prompt},
333
  ]}]
334
+ text_input = qwen_4b_processor.apply_chat_template(
335
  messages, tokenize=False, add_generation_prompt=True
336
  )
337
+ inputs = qwen_4b_processor(
338
  text=[text_input], images=[image], return_tensors="pt", padding=True
339
+ ).to(qwen_4b_model.device)
340
  streamer = TextIteratorStreamer(
341
+ qwen_4b_processor.tokenizer,
342
  skip_prompt=True, skip_special_tokens=True, timeout=120,
343
  )
344
  thread = threading.Thread(
345
+ target=qwen_4b_model.generate,
346
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
347
+ use_cache=True, temperature=1.5, min_p=0.1),
348
  )
349
  thread.start()
350
  for tok in streamer:
 
352
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
353
  thread.join()
354
 
355
+ # ── Qwen3.5-2B ─────────────────────────────────────
356
+ elif model_id == "qwen_2b":
357
+ if qwen_2b_model is None or qwen_2b_processor is None:
358
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B model not loaded.'})}\n\n"
359
  yield "data: [DONE]\n\n"
360
  return
361
  messages = [{"role": "user", "content": [
362
  {"type": "image", "image": image},
363
  {"type": "text", "text": full_prompt},
364
  ]}]
365
+ text_input = qwen_2b_processor.apply_chat_template(
366
  messages, tokenize=False, add_generation_prompt=True
367
  )
368
+ inputs = qwen_2b_processor(
369
  text=[text_input], images=[image], return_tensors="pt", padding=True
370
+ ).to(qwen_2b_model.device)
371
  streamer = TextIteratorStreamer(
372
+ qwen_2b_processor.tokenizer,
373
  skip_prompt=True, skip_special_tokens=True, timeout=120,
374
  )
375
  thread = threading.Thread(
376
+ target=qwen_2b_model.generate,
377
  kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
378
+ use_cache=True, temperature=1.5, min_p=0.1),
379
  )
380
  thread.start()
381
  for tok in streamer:
 
546
  image: UploadFile = File(...),
547
  category: str = Form(...),
548
  prompt: str = Form(...),
549
+ model_id: str = Form("qwen_vl_2b"),
550
  ):
551
  try:
552
  img_bytes = await image.read()
 
589
  }
590
  * { box-sizing: border-box; margin: 0; padding: 0; }
591
  html, body {
592
+ min-height: 100%; background: var(--bg);
593
+ color: var(--text); font-family: 'JetBrains Mono', monospace;
 
 
594
  }
595
  body {
596
  background-image:
 
603
  }
604
  /* ── Top Bar ── */
605
  .top-bar {
606
+ position: sticky; top: 0; left: 0; right: 0; height: 42px;
 
607
  background: rgba(13,13,15,0.95);
608
  border-bottom: 1px solid var(--node-border);
609
  display: flex; align-items: center; padding: 0 20px;
610
+ gap: 12px; z-index: 1000; backdrop-filter: blur(12px);
 
611
  }
612
  .top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
613
  .top-bar .sep { color: var(--node-border); }
 
621
  }
622
  /* ── Canvas ── */
623
  #canvas {
624
+ position: relative; width: 1360px;
625
+ min-height: calc(100vh - 42px); height: 900px; margin: 0 auto;
 
 
 
626
  }
627
  svg.wires {
628
  position: absolute; top: 0; left: 0;
629
  width: 100%; height: 100%;
630
+ pointer-events: none; z-index: 2; overflow: visible;
 
 
 
 
 
631
  }
632
+ path.wire { fill: none; stroke: var(--wire); stroke-width: 2.5; stroke-linecap: round; }
633
  path.wire.active {
634
  stroke: var(--wire-active); stroke-width: 3;
635
+ stroke-dasharray: 8 4; animation: flow 0.6s linear infinite;
 
636
  }
637
  @keyframes flow { to { stroke-dashoffset: -24; } }
638
  /* ── Nodes ── */
639
  .node {
640
  position: absolute; width: 295px;
641
+ background: var(--node-bg); border: 1px solid var(--node-border);
642
+ border-radius: 9px; box-shadow: 0 8px 28px rgba(0,0,0,0.5);
643
+ z-index: 10; display: flex; flex-direction: column; transition: box-shadow 0.2s;
 
 
 
 
 
 
 
644
  }
645
+ .node:hover { box-shadow: 0 8px 28px rgba(0,0,0,0.5), 0 0 0 1px rgba(124,106,247,0.3); }
646
  .node.fixed-height { height: 340px; }
647
  .node-header {
648
+ background: var(--node-header); padding: 7px 12px;
649
+ border-bottom: 1px solid var(--node-border); border-radius: 9px 9px 0 0;
650
+ font-size: 11px; font-weight: 700; cursor: grab;
 
 
 
651
  display: flex; justify-content: space-between; align-items: center;
652
  flex-shrink: 0; user-select: none;
653
  }
654
  .node-header:active { cursor: grabbing; }
655
  .node-header .id {
656
  font-size: 10px; color: var(--muted);
657
+ background: rgba(255,255,255,0.04); padding: 2px 7px; border-radius: 4px;
 
 
 
 
 
 
658
  }
659
+ .node-body { padding: 10px; display: flex; flex-direction: column; gap: 8px; flex: 1; overflow: hidden; }
660
  /* ── Ports ── */
661
  .port {
662
  position: absolute; width: 11px; height: 11px;
663
+ background: var(--node-bg); border: 2px solid var(--port);
 
664
  border-radius: 50%; z-index: 30;
665
  }
666
  .port.out { right: -6px; }
667
  .port.in { left: -6px; }
668
  /* ── Labels ── */
669
  label {
670
+ font-size: 10px; color: var(--muted); font-weight: 600;
671
+ display: block; margin-bottom: 3px; letter-spacing: 0.07em; text-transform: uppercase;
 
672
  }
673
  input[type="file"] { display: none; }
674
  /* ── Upload Zone ── */
675
  .file-upload {
676
+ border: 1.5px dashed var(--node-border); border-radius: 7px; padding: 12px 10px;
677
+ text-align: center; cursor: pointer; font-size: 11px; color: var(--muted);
678
+ transition: border-color 0.2s, background 0.2s; background: rgba(255,255,255,0.01);
 
 
 
679
  display: flex; flex-direction: column; align-items: center; gap: 5px;
680
  }
681
+ .file-upload:hover { border-color: var(--accent); background: rgba(124,106,247,0.04); }
 
 
 
682
  .file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
683
  .file-upload:hover svg { opacity: 0.9; }
684
  /* ── Preview wrapper ── */
685
  .preview-wrap {
686
+ display: none; position: relative; border-radius: 7px;
687
+ overflow: hidden; border: 1px solid var(--node-border); background: #000;
 
688
  }
689
  .preview-wrap.visible { display: block; }
690
  .img-preview { width: 100%; height: 170px; object-fit: contain; display: block; }
691
  /* ── Clear button ── */
692
  .clear-btn {
693
+ position: absolute; top: 6px; right: 6px; width: 24px; height: 24px;
694
+ border-radius: 50%; background: rgba(13,13,15,0.80);
695
+ border: 1px solid var(--node-border); color: var(--accent3); cursor: pointer;
 
 
696
  display: flex; align-items: center; justify-content: center;
697
  transition: background 0.18s, border-color 0.18s, transform 0.12s;
698
  z-index: 20; backdrop-filter: blur(6px);
699
  }
700
+ .clear-btn:hover { background: rgba(255,107,107,0.18); border-color: var(--accent3); transform: scale(1.08); }
 
 
 
701
  .clear-btn:active { transform: scale(0.95); }
702
  .clear-btn svg { pointer-events: none; }
703
  /* ── Filename chip ── */
704
  .img-chip {
705
  display: none; align-items: center; gap: 6px;
706
+ background: rgba(124,106,247,0.08); border: 1px solid rgba(124,106,247,0.22);
707
+ border-radius: 5px; padding: 4px 8px; font-size: 9px; color: var(--muted); overflow: hidden;
 
 
708
  }
709
  .img-chip.visible { display: flex; }
710
+ .img-chip .chip-dot { width: 5px; height: 5px; border-radius: 50%; background: var(--accent2); flex-shrink: 0; box-shadow: 0 0 4px var(--accent2); }
711
+ .img-chip .chip-name { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; flex: 1; color: var(--text); font-size: 9px; }
 
 
 
 
 
 
 
 
712
  .img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 9px; }
713
  select, textarea {
714
+ width: 100%; background: rgba(0,0,0,0.3); border: 1px solid var(--node-border);
715
+ color: var(--text); padding: 7px 9px; border-radius: 5px; outline: none;
 
 
716
  font-size: 11px; font-family: 'JetBrains Mono', monospace;
717
  resize: none; transition: border-color 0.2s;
718
  }
 
721
  button.run-btn {
722
  background: linear-gradient(135deg, var(--accent), #9b59b6);
723
  color: #fff; border: none; padding: 8px; border-radius: 6px;
724
+ font-weight: 700; font-size: 11px; font-family: 'JetBrains Mono', monospace;
725
+ cursor: pointer; transition: opacity 0.2s, transform 0.1s;
 
726
  display: flex; justify-content: center; align-items: center; gap: 8px;
727
  letter-spacing: 0.04em; flex-shrink: 0;
728
  }
729
+ button.run-btn:hover { opacity: 0.9; }
730
+ button.run-btn:active { transform: scale(0.98); }
731
  button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
732
  /* ── Output node ── */
733
+ .output-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
734
+ .output-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
735
+ /* ── Icon buttons (copy / download) ── */
736
+ .icon-btn {
 
 
 
 
 
 
737
  display: flex; align-items: center; gap: 5px;
738
+ background: rgba(124,106,247,0.10); border: 1px solid rgba(124,106,247,0.25);
 
739
  border-radius: 5px; padding: 3px 8px;
740
+ font-size: 9px; font-weight: 700; font-family: 'JetBrains Mono', monospace;
741
+ color: var(--accent); cursor: pointer; letter-spacing: 0.05em;
742
+ transition: background 0.18s, border-color 0.18s, transform 0.1s; flex-shrink: 0;
743
+ text-decoration: none;
 
 
744
  }
745
+ .icon-btn:hover { background: rgba(124,106,247,0.22); border-color: var(--accent); }
746
+ .icon-btn:active { transform: scale(0.95); }
747
+ .icon-btn.teal {
748
+ background: rgba(78,205,196,0.10); border-color: rgba(78,205,196,0.25); color: var(--accent2);
 
749
  }
750
+ .icon-btn.teal:hover { background: rgba(78,205,196,0.22); border-color: var(--accent2); }
751
+ .icon-btn.copied { background: rgba(78,205,196,0.15); border-color: var(--accent2); color: var(--accent2); }
752
+ .icon-btn svg { pointer-events: none; flex-shrink: 0; }
753
  .output-box {
754
+ background: rgba(0,0,0,0.4); border: 1px solid var(--node-border);
755
+ border-radius: 5px; padding: 10px; flex: 1; overflow-y: auto;
756
+ font-size: 11px; line-height: 1.6; color: #c8c8e0; white-space: pre-wrap;
757
+ user-select: text; font-family: 'JetBrains Mono', monospace; min-height: 0;
 
 
 
 
758
  }
759
+ /* ── Grounding node ── */
760
+ .ground-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
761
+ .ground-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
762
  .ground-canvas-wrap {
763
+ position: relative; flex: 1; border: 1px solid var(--node-border);
764
+ border-radius: 5px; overflow: hidden; background: #000; min-height: 0;
 
 
765
  }
766
  .ground-canvas-wrap canvas { width: 100%; height: 100%; object-fit: contain; display: block; }
767
  .ground-placeholder {
768
+ position: absolute; inset: 0; display: flex; align-items: center;
769
+ justify-content: center; font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
 
770
  }
771
  .loader {
772
+ width: 11px; height: 11px; border: 2px solid rgba(255,255,255,0.3);
 
773
  border-top-color: #fff; border-radius: 50%;
774
  animation: spin 0.7s linear infinite; display: none;
775
  }
776
  @keyframes spin { to { transform: rotate(360deg); } }
777
+ .status-dot { width: 6px; height: 6px; border-radius: 50%; background: var(--muted); display: inline-block; margin-right: 6px; }
 
 
 
778
  .status-dot.active { background: var(--accent2); box-shadow: 0 0 5px var(--accent2); }
779
  /* ── Model badges ── */
780
  .model-badge {
781
+ display: inline-block; padding: 2px 7px; border-radius: 4px;
782
+ font-size: 9px; font-weight: 700; letter-spacing: 0.06em; text-transform: uppercase;
 
783
  }
784
+ .model-badge.qvl2b { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
785
+ .model-badge.qvl4b { background: rgba(255,100,80,0.15); color: #ff6450; border: 1px solid rgba(255,100,80,0.35); }
786
  .model-badge.q4bunred { background: rgba(255,80,80,0.18); color: #ff5050; border: 1px solid rgba(255,80,80,0.40); }
787
  .model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
788
  .model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
 
 
789
  .model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
790
  .model-badge.g4e2b { background: rgba(66,197,107,0.15); color: #42c56b; border: 1px solid rgba(66,197,107,0.35); }
791
  .model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
792
  .model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
793
  .model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
794
+ .model-info-box { border-radius: 6px; padding: 9px; font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0; }
 
 
 
795
  .canvas-footer { height: 36px; }
796
  </style>
797
  </head>
 
823
  <label>Upload Image</label>
824
  <div class="file-upload" id="dropZone">
825
  <svg width="30" height="30" viewBox="0 0 24 24" fill="none"
826
+ stroke="#7c6af7" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
 
827
  <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
828
  <circle cx="8.5" cy="8.5" r="1.5"/>
829
  <polyline points="21 15 16 10 5 21"/>
 
835
  <img id="imgPreview" class="img-preview" />
836
  <button class="clear-btn" id="clearBtn" title="Remove image">
837
  <svg width="12" height="12" viewBox="0 0 24 24" fill="none"
838
+ stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
 
839
  <line x1="18" y1="6" x2="6" y2="18"/>
840
  <line x1="6" y1="6" x2="18" y2="18"/>
841
  </svg>
 
861
  <div>
862
  <label>Active Model</label>
863
  <select id="modelSelect">
864
+ <option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
865
+ <option value="qwen_vl_4b">Qwen3-VL-4B-Instruct</option>
866
  <option value="qwen_4b_unredacted">Qwen3.5-4B-Unredacted-MAX</option>
867
  <option value="qwen_4b">Qwen3.5-4B</option>
868
  <option value="qwen_2b">Qwen3.5-2B</option>
 
 
869
  <option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
870
  <option value="gemma4_e2b">Gemma4-E2B-it (Google)</option>
871
  <option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
 
874
  </select>
875
  </div>
876
  <div id="modelInfoBox" class="model-info-box"
877
+ style="background:rgba(255,150,50,0.07);border:1px solid rgba(255,150,50,0.3);">
878
+ <span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
879
+ Qwen3-VL-2B-Instruct dedicated vision-language model by Alibaba Cloud.
880
+ Strong spatial grounding, OCR &amp; instruction-following.
881
  </div>
882
  <div style="flex:1;"></div>
883
  </div>
 
924
  <div class="output-node-body">
925
  <div class="output-header-row">
926
  <label style="margin-bottom:0;">Streamed Result</label>
927
+ <button class="icon-btn" id="copyBtn" title="Copy result to clipboard">
928
  <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
929
+ stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
 
930
  <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
931
  <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
932
  </svg>
 
944
  <span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
945
  <span class="id">ID: 05</span>
946
  </div>
947
+ <div class="ground-node-body">
948
+ <div class="ground-header-row">
949
+ <label style="margin-bottom:0;">Point / Detect Overlay</label>
950
+ <a class="icon-btn teal" id="downloadBtn" title="Download overlay image" style="display:none;">
951
+ <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
952
+ stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
953
+ <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
954
+ <polyline points="7 10 12 15 17 10"/>
955
+ <line x1="12" y1="15" x2="12" y2="3"/>
956
+ </svg>
957
+ SAVE
958
+ </a>
959
+ </div>
960
  <div class="ground-canvas-wrap">
961
  <canvas id="groundCanvas"></canvas>
962
  <div class="ground-placeholder" id="groundPlaceholder">
 
976
  const canvasEl = document.getElementById('canvas');
977
  function portCenter(id) {
978
  const el = document.getElementById(id);
979
+ if (!el) return {x:0,y:0};
980
+ const er = el.getBoundingClientRect(), cr = canvasEl.getBoundingClientRect();
 
981
  return { x: er.left + er.width/2 - cr.left, y: er.top + er.height/2 - cr.top };
982
  }
983
  function bezier(p1, p2) {
 
1004
  const header = node.querySelector('.node-header');
1005
  let drag = false, sx, sy, il, it;
1006
  header.addEventListener('mousedown', e => {
1007
+ drag=true; sx=e.clientX; sy=e.clientY;
1008
+ il=parseInt(node.style.left)||0; it=parseInt(node.style.top)||0;
1009
+ node.style.zIndex=100; e.preventDefault();
1010
  });
1011
  document.addEventListener('mousemove', e => {
1012
  if (!drag) return;
1013
+ node.style.left=`${il+e.clientX-sx}px`; node.style.top=`${it+e.clientY-sy}px`;
 
1014
  updateWires();
1015
  });
1016
+ document.addEventListener('mouseup', () => { if(drag){drag=false;node.style.zIndex=10;} });
1017
  });
1018
  window.addEventListener('resize', updateWires);
1019
  window.addEventListener('scroll', updateWires);
 
1035
  const dotImg = document.getElementById('dot-img');
1036
 
1037
  function formatBytes(b) {
1038
+ if (b<1024) return b+' B'; if (b<1048576) return (b/1024).toFixed(1)+' KB';
1039
+ return (b/1048576).toFixed(1)+' MB';
 
1040
  }
1041
  function handleFile(file) {
1042
+ if (!file||!file.type.startsWith('image/')) return;
1043
+ currentFile=file; imgPreview.src=URL.createObjectURL(file);
1044
+ previewWrap.classList.add('visible'); dropZone.style.display='none';
1045
+ chipName.textContent=file.name; chipSize.textContent=formatBytes(file.size);
1046
+ imgChip.classList.add('visible'); dotImg.classList.add('active');
 
 
 
 
1047
  requestAnimationFrame(updateWires);
1048
  }
1049
  function clearImage() {
1050
+ currentFile=null; imgPreview.src=''; previewWrap.classList.remove('visible');
1051
+ dropZone.style.display=''; imgChip.classList.remove('visible');
1052
+ chipName.textContent='—'; chipSize.textContent=''; fileInput.value='';
1053
+ dotImg.classList.remove('active'); requestAnimationFrame(updateWires);
 
 
 
1054
  }
1055
  dropZone.onclick = () => fileInput.click();
1056
  fileInput.onchange = e => handleFile(e.target.files[0]);
 
1071
  dotModel.classList.add('active');
1072
 
1073
  const MODEL_INFO = {
1074
+ qwen_vl_2b: {
1075
+ html: `<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
1076
+ Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
1077
+ Strong spatial grounding, OCR &amp; instruction-following.`,
1078
+ bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.30)',
1079
+ },
1080
+ qwen_vl_4b: {
1081
+ html: `<span class="model-badge qvl4b">QWEN3-VL · 4B</span><br><br>
1082
+ Qwen3-VL-4B-Instruct — enhanced vision-language model by Alibaba Cloud.
1083
+ Superior spatial grounding, richer OCR &amp; stronger multi-step reasoning.`,
1084
+ bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
1085
+ },
1086
  qwen_4b_unredacted: {
1087
  html: `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
1088
  Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
 
1101
  Lightweight &amp; fast — ideal for quick Query, Caption, Point &amp; Detect tasks.`,
1102
  bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
1103
  },
 
 
 
 
 
 
 
 
 
 
 
 
1104
  lfm_450: {
1105
  html: `<span class="model-badge lfm450">LFM · 450M</span><br><br>
1106
  LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
 
1151
  Point: 'e.g., The gun held by the person.',
1152
  Detect: 'e.g., The headlight of the car.',
1153
  };
1154
+ categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value]||''; };
1155
 
1156
  // ══════════════════════════════════════════════
1157
  // ROBUST JSON EXTRACTOR
 
 
 
 
 
 
 
 
 
1158
  // ══════════════════════════════════════════════
1159
  function extractGroundingJSON(raw) {
1160
+ // 1. Strip all <think>…</think> blocks (multi-pass)
1161
+ let text = raw, prev = null;
 
 
 
1162
  while (prev !== text) {
1163
  prev = text;
1164
  text = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
1165
  }
1166
+ // 2. Strip markdown fences, keep inner content
1167
+ text = text.replace(/```(?:json)?([\s\S]*?)```/gi, '$1');
1168
+ text = text.replace(/```/g, '').trim();
1169
 
1170
+ // Balanced bracket extractor
 
 
 
 
 
 
 
 
 
 
 
1171
  function extractBalanced(str, startIdx, openCh, closeCh) {
1172
+ let depth=0, inStr=false, esc=false;
1173
+ for (let i=startIdx; i<str.length; i++) {
1174
+ const c=str[i];
1175
+ if (esc) { esc=false; continue; }
1176
+ if (c==='\\\\') { esc=true; continue; }
1177
+ if (c==='"') { inStr=!inStr; continue; }
1178
+ if (inStr) continue;
1179
+ if (c===openCh) depth++;
1180
+ if (c===closeCh) {
1181
  depth--;
1182
+ if (depth===0) {
1183
+ try { return JSON.parse(str.slice(startIdx, i+1)); } catch(_) { return null; }
 
1184
  }
1185
  }
1186
  }
1187
  return null;
1188
  }
1189
 
1190
+ // Search from the END models emit JSON after reasoning prose
1191
+ for (let i=text.length-1; i>=0; i--) {
1192
+ if (text[i]==='[') { const r=extractBalanced(text,i,'[',']'); if(r!==null) return r; }
 
 
 
 
 
 
 
 
 
 
 
 
1193
  }
1194
+ for (let i=text.length-1; i>=0; i--) {
1195
+ if (text[i]==='{') { const r=extractBalanced(text,i,'{','}'); if(r!==null) return r; }
 
1196
  }
1197
+ // Fallback: search from start
1198
+ const fa=text.indexOf('['); if(fa!==-1){const r=extractBalanced(text,fa,'[',']');if(r!==null)return r;}
1199
+ const fo=text.indexOf('{'); if(fo!==-1){const r=extractBalanced(text,fo,'{','}');if(r!==null)return r;}
 
 
 
 
 
 
 
 
 
 
 
 
 
1200
  try { return JSON.parse(text); } catch(_) {}
1201
  return null;
1202
  }
 
1207
  const groundCanvas = document.getElementById('groundCanvas');
1208
  const groundPlaceholder = document.getElementById('groundPlaceholder');
1209
  const gCtx = groundCanvas.getContext('2d');
1210
+ const downloadBtn = document.getElementById('downloadBtn');
1211
 
1212
  const PALETTE = ['#4ecdc4','#7c6af7','#ff6b6b','#ffd93d','#6bcb77','#ff922b','#cc5de8','#339af0'];
1213
 
1214
  function hexToRgba(hex, alpha) {
1215
+ const r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
 
 
1216
  return `rgba(${r},${g},${b},${alpha})`;
1217
  }
1218
  function roundRect(ctx, x, y, w, h, r) {
1219
+ ctx.beginPath(); ctx.moveTo(x+r,y);
 
1220
  ctx.lineTo(x+w-r,y); ctx.quadraticCurveTo(x+w,y,x+w,y+r);
1221
  ctx.lineTo(x+w,y+h-r); ctx.quadraticCurveTo(x+w,y+h,x+w-r,y+h);
1222
  ctx.lineTo(x+r,y+h); ctx.quadraticCurveTo(x,y+h,x,y+h-r);
1223
+ ctx.lineTo(x,y+r); ctx.quadraticCurveTo(x,y,x+r,y); ctx.closePath();
1224
+ }
1225
+
1226
+ function updateDownloadBtn() {
1227
+ // Build a timestamped filename and update the anchor href
1228
+ const dataURL = groundCanvas.toDataURL('image/png');
1229
+ const ts = new Date().toISOString().replace(/[:.]/g,'-').slice(0,19);
1230
+ downloadBtn.href = dataURL;
1231
+ downloadBtn.download = `grounding_${ts}.png`;
1232
+ downloadBtn.style.display = 'flex';
1233
  }
1234
 
1235
  function drawGrounding(imgSrc, rawText) {
1236
  const parsed = extractGroundingJSON(rawText);
1237
+ if (!parsed) { console.warn('Grounding: no JSON found in:', rawText.slice(0,200)); return; }
 
 
 
1238
 
1239
  const img = new Image();
1240
  img.onload = () => {
1241
+ const W=img.naturalWidth, H=img.naturalHeight;
1242
+ groundCanvas.width=W; groundCanvas.height=H;
 
1243
  gCtx.drawImage(img, 0, 0);
1244
+ groundPlaceholder.style.display='none';
1245
 
1246
+ const lw=Math.max(2,W/200), fs=Math.max(12,W/40);
1247
+ gCtx.lineWidth=lw;
1248
+ gCtx.font=`bold ${fs}px JetBrains Mono, monospace`;
 
1249
 
1250
  const items = Array.isArray(parsed) ? parsed : [parsed];
 
1251
  items.forEach((item, i) => {
1252
  const col = PALETTE[i % PALETTE.length];
1253
 
1254
+ // ── Bounding box ─────────────────────────────
1255
  let bbox = null;
1256
+ if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length===4) bbox=item.bbox_2d;
1257
+ else if (Array.isArray(item?.bbox) && item.bbox.length===4) bbox=item.bbox;
1258
+ else if (Array.isArray(item) && item.length===4 && item.every(n=>typeof n==='number')) bbox=item;
 
 
 
 
1259
 
1260
  if (bbox) {
1261
+ let [x1,y1,x2,y2]=bbox.map(Number);
1262
+ if (x1<=1&&y1<=1&&x2<=1&&y2<=1) { x1*=W;y1*=H;x2*=W;y2*=H; }
1263
+ if (x2<x1)[x1,x2]=[x2,x1]; if (y2<y1)[y1,y2]=[y2,y1];
1264
+ const bw=x2-x1, bh=y2-y1;
1265
+ const lbl=(item?.label??`obj ${i+1}`).toString();
1266
+ gCtx.fillStyle=hexToRgba(col,0.20); gCtx.fillRect(x1,y1,bw,bh);
1267
+ gCtx.strokeStyle=col; gCtx.lineWidth=lw; gCtx.strokeRect(x1,y1,bw,bh);
1268
+ const tw=gCtx.measureText(lbl).width, ph=fs*1.45, pw=tw+12;
1269
+ const lx=x1, ly=Math.max(0,y1-ph);
1270
+ gCtx.fillStyle=col; roundRect(gCtx,lx,ly,pw,ph,4); gCtx.fill();
1271
+ gCtx.fillStyle='#fff'; gCtx.fillText(lbl,lx+6,ly+ph*0.76);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1272
  return;
1273
  }
1274
 
1275
+ // ── Point ────────────────────────────────────
1276
  let pt = null;
1277
+ if (Array.isArray(item?.point_2d) && item.point_2d.length===2) pt=item.point_2d;
1278
+ else if (Array.isArray(item?.point) && item.point.length===2) pt=item.point;
1279
+ else if (Array.isArray(item) && item.length===2 && item.every(n=>typeof n==='number')) pt=item;
 
 
 
 
1280
 
1281
  if (pt) {
1282
+ let [x,y]=pt.map(Number);
1283
+ if (x<=1&&y<=1){x*=W;y*=H;}
1284
+ const r=Math.max(8,W/60);
1285
+ const lbl=(item?.label??`pt ${i+1}`).toString();
1286
+ gCtx.beginPath(); gCtx.arc(x,y,r*1.8,0,Math.PI*2);
1287
+ gCtx.fillStyle=hexToRgba(col,0.18); gCtx.fill();
1288
+ gCtx.beginPath(); gCtx.arc(x,y,r,0,Math.PI*2);
1289
+ gCtx.fillStyle=col; gCtx.fill();
1290
+ gCtx.strokeStyle='#fff'; gCtx.lineWidth=Math.max(1.5,lw); gCtx.stroke();
1291
+ gCtx.fillStyle='#fff'; gCtx.fillText(lbl,x+r+5,y+fs*0.4);
 
 
 
 
 
 
 
 
1292
  }
1293
  });
1294
+
1295
+ // Enable download button after drawing
1296
+ updateDownloadBtn();
1297
  };
1298
+ img.onerror = () => console.error('Grounding: failed to load image.');
1299
  img.src = imgSrc;
1300
  }
1301
 
 
1310
  copyBtn.classList.remove('copied');
1311
  copyBtn.innerHTML = `
1312
  <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
1313
+ stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
 
1314
  <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
1315
  <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
1316
  </svg> COPY`;
1317
  }
 
1318
  copyBtn.onclick = () => {
1319
+ const txt = outputBox.innerText||'';
1320
+ if (!txt||txt==='Results will stream here...') return;
1321
  navigator.clipboard.writeText(txt).then(() => {
1322
  copyBtn.classList.add('copied');
1323
  copyBtn.innerHTML = `
1324
  <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
1325
+ stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
 
1326
  <polyline points="20 6 9 17 4 12"/>
1327
  </svg> COPIED`;
1328
+ clearTimeout(copyTimer); copyTimer=setTimeout(resetCopyBtn,2000);
 
1329
  }).catch(() => {
1330
+ const ta=document.createElement('textarea'); ta.value=txt;
1331
+ ta.style.position='fixed'; ta.style.opacity='0';
1332
+ document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);
 
1333
  });
1334
  };
1335
 
 
1348
  const promptStr = promptInput.value.trim();
1349
  if (!promptStr) { alert('Please enter a prompt directive.'); return; }
1350
 
1351
+ runBtn.disabled=true; btnLoader.style.display='inline-block';
1352
+ outputBox.innerText=''; outputBox.style.color='';
1353
+ groundPlaceholder.style.display='flex';
1354
+ gCtx.clearRect(0,0,groundCanvas.width,groundCanvas.height);
1355
+ downloadBtn.style.display='none';
 
1356
  dotTask.classList.add('active');
1357
+ dotOut.classList.remove('active'); dotGnd.classList.remove('active');
1358
+ allWires.forEach(id=>document.getElementById(id)?.classList.add('active'));
 
1359
  resetCopyBtn();
1360
 
1361
+ const formData=new FormData();
1362
  formData.append('image', currentFile);
1363
  formData.append('category', categorySelect.value);
1364
  formData.append('prompt', promptStr);
1365
  formData.append('model_id', modelSelect.value);
1366
 
1367
+ let fullText='';
1368
+ const imgObjectURL=URL.createObjectURL(currentFile);
1369
 
1370
  try {
1371
+ const response=await fetch('/api/run',{method:'POST',body:formData});
1372
+ if (!response.ok) { const err=await response.json(); throw new Error(err.error||'Execution failed.'); }
 
 
 
 
 
 
 
1373
 
1374
+ const reader=response.body.getReader(), decoder=new TextDecoder('utf-8');
1375
+ let buffer='';
1376
  while (true) {
1377
+ const {value,done}=await reader.read(); if(done)break;
1378
+ buffer+=decoder.decode(value,{stream:true});
1379
+ const lines=buffer.split('\\n\\n'); buffer=lines.pop();
 
 
1380
  for (const line of lines) {
1381
  if (!line.startsWith('data: ')) continue;
1382
+ const payload=line.replace('data: ','');
1383
+ if (payload==='[DONE]') break;
1384
  try {
1385
+ const data=JSON.parse(payload);
1386
+ if (data.chunk) { fullText+=data.chunk; outputBox.innerText=fullText; outputBox.scrollTop=outputBox.scrollHeight; }
 
 
 
 
1387
  } catch(_) {}
1388
  }
1389
  }
1390
 
1391
  dotOut.classList.add('active');
1392
 
1393
+ // Grounding overlay for Point / Detect
1394
+ const cat=categorySelect.value;
1395
+ if ((cat==='Point'||cat==='Detect') && fullText.trim()) {
1396
+ const parsed=extractGroundingJSON(fullText);
1397
+ if (parsed!==null) {
1398
  dotGnd.classList.add('active');
1399
  drawGrounding(imgObjectURL, fullText);
1400
  } else {
 
1402
  }
1403
  }
1404
 
1405
+ } catch(err) {
1406
+ outputBox.innerText=`[Error] ${err.message}`; outputBox.style.color='#ff6b6b';
 
1407
  } finally {
1408
+ runBtn.disabled=false; btnLoader.style.display='none';
 
1409
  dotTask.classList.remove('active');
1410
+ allWires.forEach(id=>document.getElementById(id)?.classList.remove('active'));
1411
  }
1412
  };
1413
  </script>
 
1416
  """
1417
 
1418
  if __name__ == "__main__":
1419
+ app.launch(show_error=True)