Spaces:
Running on Zero
Running on Zero
update app [final] ✅
Browse files
app.py
CHANGED
|
@@ -36,17 +36,51 @@ DTYPE = (
|
|
| 36 |
else torch.float16
|
| 37 |
)
|
| 38 |
|
|
|
|
|
|
|
| 39 |
QWEN_4B_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-4B-Unredacted-MAX"
|
| 40 |
QWEN_4B_MODEL_NAME = "Qwen/Qwen3.5-4B"
|
| 41 |
QWEN_2B_MODEL_NAME = "Qwen/Qwen3.5-2B"
|
| 42 |
-
QWEN_VL_2B_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
|
| 43 |
-
QWEN_VL_4B_MODEL_NAME = "Qwen/Qwen3-VL-4B-Instruct"
|
| 44 |
LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
|
| 45 |
GEMMA4_E2B_NAME = "google/gemma-4-E2B-it"
|
| 46 |
LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
|
| 47 |
QWEN_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
|
| 48 |
QWEN25_VL_3B_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
# ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
|
| 51 |
print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
|
| 52 |
try:
|
|
@@ -86,40 +120,6 @@ except Exception as e:
|
|
| 86 |
qwen_2b_model = None
|
| 87 |
qwen_2b_processor = None
|
| 88 |
|
| 89 |
-
# ── Qwen3-VL-2B-Instruct ────────────────────────────────
|
| 90 |
-
print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
|
| 91 |
-
try:
|
| 92 |
-
qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 93 |
-
QWEN_VL_2B_MODEL_NAME,
|
| 94 |
-
trust_remote_code=True,
|
| 95 |
-
torch_dtype=torch.bfloat16,
|
| 96 |
-
).to(DEVICE).eval()
|
| 97 |
-
qwen_vl_2b_processor = AutoProcessor.from_pretrained(
|
| 98 |
-
QWEN_VL_2B_MODEL_NAME, trust_remote_code=True
|
| 99 |
-
)
|
| 100 |
-
print("Qwen3-VL-2B model loaded successfully.")
|
| 101 |
-
except Exception as e:
|
| 102 |
-
print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
|
| 103 |
-
qwen_vl_2b_model = None
|
| 104 |
-
qwen_vl_2b_processor = None
|
| 105 |
-
|
| 106 |
-
# ── Qwen3-VL-4B-Instruct ────────────────────────────────
|
| 107 |
-
print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
|
| 108 |
-
try:
|
| 109 |
-
qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 110 |
-
QWEN_VL_4B_MODEL_NAME,
|
| 111 |
-
trust_remote_code=True,
|
| 112 |
-
torch_dtype=torch.bfloat16,
|
| 113 |
-
).to(DEVICE).eval()
|
| 114 |
-
qwen_vl_4b_processor = AutoProcessor.from_pretrained(
|
| 115 |
-
QWEN_VL_4B_MODEL_NAME, trust_remote_code=True
|
| 116 |
-
)
|
| 117 |
-
print("Qwen3-VL-4B model loaded successfully.")
|
| 118 |
-
except Exception as e:
|
| 119 |
-
print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
|
| 120 |
-
qwen_vl_4b_model = None
|
| 121 |
-
qwen_vl_4b_processor = None
|
| 122 |
-
|
| 123 |
# ── LFM2.5-VL-450M ──────────────────────────────────────
|
| 124 |
print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
|
| 125 |
try:
|
|
@@ -215,7 +215,7 @@ def safe_parse_json(text: str):
|
|
| 215 |
# --- Inference Generator (Streaming) ---
|
| 216 |
@spaces.GPU(duration=120)
|
| 217 |
def generate_inference_stream(
|
| 218 |
-
image: Image.Image, category: str, prompt: str, model_id: str = "
|
| 219 |
):
|
| 220 |
if category == "Query":
|
| 221 |
full_prompt = prompt
|
|
@@ -228,30 +228,30 @@ def generate_inference_stream(
|
|
| 228 |
else:
|
| 229 |
full_prompt = prompt
|
| 230 |
|
| 231 |
-
# ── Qwen3
|
| 232 |
-
if model_id == "
|
| 233 |
-
if
|
| 234 |
-
yield f"data: {json.dumps({'chunk': '[Error] Qwen3
|
| 235 |
yield "data: [DONE]\n\n"
|
| 236 |
return
|
| 237 |
messages = [{"role": "user", "content": [
|
| 238 |
{"type": "image", "image": image},
|
| 239 |
{"type": "text", "text": full_prompt},
|
| 240 |
]}]
|
| 241 |
-
text_input =
|
| 242 |
messages, tokenize=False, add_generation_prompt=True
|
| 243 |
)
|
| 244 |
-
inputs =
|
| 245 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 246 |
-
).to(
|
| 247 |
streamer = TextIteratorStreamer(
|
| 248 |
-
|
| 249 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 250 |
)
|
| 251 |
thread = threading.Thread(
|
| 252 |
-
target=
|
| 253 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 254 |
-
use_cache=True, temperature=1.
|
| 255 |
)
|
| 256 |
thread.start()
|
| 257 |
for tok in streamer:
|
|
@@ -259,30 +259,30 @@ def generate_inference_stream(
|
|
| 259 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 260 |
thread.join()
|
| 261 |
|
| 262 |
-
# ── Qwen3
|
| 263 |
-
elif model_id == "
|
| 264 |
-
if
|
| 265 |
-
yield f"data: {json.dumps({'chunk': '[Error] Qwen3
|
| 266 |
yield "data: [DONE]\n\n"
|
| 267 |
return
|
| 268 |
messages = [{"role": "user", "content": [
|
| 269 |
{"type": "image", "image": image},
|
| 270 |
{"type": "text", "text": full_prompt},
|
| 271 |
]}]
|
| 272 |
-
text_input =
|
| 273 |
messages, tokenize=False, add_generation_prompt=True
|
| 274 |
)
|
| 275 |
-
inputs =
|
| 276 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 277 |
-
).to(
|
| 278 |
streamer = TextIteratorStreamer(
|
| 279 |
-
|
| 280 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 281 |
)
|
| 282 |
thread = threading.Thread(
|
| 283 |
-
target=
|
| 284 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 285 |
-
use_cache=True, temperature=1.
|
| 286 |
)
|
| 287 |
thread.start()
|
| 288 |
for tok in streamer:
|
|
@@ -290,28 +290,28 @@ def generate_inference_stream(
|
|
| 290 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 291 |
thread.join()
|
| 292 |
|
| 293 |
-
# ── Qwen3.5-
|
| 294 |
-
elif model_id == "
|
| 295 |
-
if
|
| 296 |
-
yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-
|
| 297 |
yield "data: [DONE]\n\n"
|
| 298 |
return
|
| 299 |
messages = [{"role": "user", "content": [
|
| 300 |
{"type": "image", "image": image},
|
| 301 |
{"type": "text", "text": full_prompt},
|
| 302 |
]}]
|
| 303 |
-
text_input =
|
| 304 |
messages, tokenize=False, add_generation_prompt=True
|
| 305 |
)
|
| 306 |
-
inputs =
|
| 307 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 308 |
-
).to(
|
| 309 |
streamer = TextIteratorStreamer(
|
| 310 |
-
|
| 311 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 312 |
)
|
| 313 |
thread = threading.Thread(
|
| 314 |
-
target=
|
| 315 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 316 |
use_cache=True, temperature=1.5, min_p=0.1),
|
| 317 |
)
|
|
@@ -321,30 +321,30 @@ def generate_inference_stream(
|
|
| 321 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 322 |
thread.join()
|
| 323 |
|
| 324 |
-
# ── Qwen3-
|
| 325 |
-
elif model_id == "
|
| 326 |
-
if
|
| 327 |
-
yield f"data: {json.dumps({'chunk': '[Error] Qwen3-
|
| 328 |
yield "data: [DONE]\n\n"
|
| 329 |
return
|
| 330 |
messages = [{"role": "user", "content": [
|
| 331 |
{"type": "image", "image": image},
|
| 332 |
{"type": "text", "text": full_prompt},
|
| 333 |
]}]
|
| 334 |
-
text_input =
|
| 335 |
messages, tokenize=False, add_generation_prompt=True
|
| 336 |
)
|
| 337 |
-
inputs =
|
| 338 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 339 |
-
).to(
|
| 340 |
streamer = TextIteratorStreamer(
|
| 341 |
-
|
| 342 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 343 |
)
|
| 344 |
thread = threading.Thread(
|
| 345 |
-
target=
|
| 346 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 347 |
-
use_cache=True, temperature=1.
|
| 348 |
)
|
| 349 |
thread.start()
|
| 350 |
for tok in streamer:
|
|
@@ -352,30 +352,30 @@ def generate_inference_stream(
|
|
| 352 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 353 |
thread.join()
|
| 354 |
|
| 355 |
-
# ── Qwen3-
|
| 356 |
-
elif model_id == "
|
| 357 |
-
if
|
| 358 |
-
yield f"data: {json.dumps({'chunk': '[Error] Qwen3-
|
| 359 |
yield "data: [DONE]\n\n"
|
| 360 |
return
|
| 361 |
messages = [{"role": "user", "content": [
|
| 362 |
{"type": "image", "image": image},
|
| 363 |
{"type": "text", "text": full_prompt},
|
| 364 |
]}]
|
| 365 |
-
text_input =
|
| 366 |
messages, tokenize=False, add_generation_prompt=True
|
| 367 |
)
|
| 368 |
-
inputs =
|
| 369 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 370 |
-
).to(
|
| 371 |
streamer = TextIteratorStreamer(
|
| 372 |
-
|
| 373 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 374 |
)
|
| 375 |
thread = threading.Thread(
|
| 376 |
-
target=
|
| 377 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 378 |
-
use_cache=True, temperature=1.
|
| 379 |
)
|
| 380 |
thread.start()
|
| 381 |
for tok in streamer:
|
|
@@ -546,7 +546,7 @@ async def run_inference(
|
|
| 546 |
image: UploadFile = File(...),
|
| 547 |
category: str = Form(...),
|
| 548 |
prompt: str = Form(...),
|
| 549 |
-
model_id: str = Form("
|
| 550 |
):
|
| 551 |
try:
|
| 552 |
img_bytes = await image.read()
|
|
@@ -589,10 +589,8 @@ async def homepage(request: Request):
|
|
| 589 |
}
|
| 590 |
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 591 |
html, body {
|
| 592 |
-
min-height: 100%;
|
| 593 |
-
|
| 594 |
-
color: var(--text);
|
| 595 |
-
font-family: 'JetBrains Mono', monospace;
|
| 596 |
}
|
| 597 |
body {
|
| 598 |
background-image:
|
|
@@ -605,13 +603,11 @@ async def homepage(request: Request):
|
|
| 605 |
}
|
| 606 |
/* ── Top Bar ── */
|
| 607 |
.top-bar {
|
| 608 |
-
position: sticky; top: 0; left: 0; right: 0;
|
| 609 |
-
height: 42px;
|
| 610 |
background: rgba(13,13,15,0.95);
|
| 611 |
border-bottom: 1px solid var(--node-border);
|
| 612 |
display: flex; align-items: center; padding: 0 20px;
|
| 613 |
-
gap: 12px; z-index: 1000;
|
| 614 |
-
backdrop-filter: blur(12px);
|
| 615 |
}
|
| 616 |
.top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
|
| 617 |
.top-bar .sep { color: var(--node-border); }
|
|
@@ -625,146 +621,98 @@ async def homepage(request: Request):
|
|
| 625 |
}
|
| 626 |
/* ── Canvas ── */
|
| 627 |
#canvas {
|
| 628 |
-
position: relative;
|
| 629 |
-
|
| 630 |
-
min-height: calc(100vh - 42px);
|
| 631 |
-
height: 900px;
|
| 632 |
-
margin: 0 auto;
|
| 633 |
}
|
| 634 |
svg.wires {
|
| 635 |
position: absolute; top: 0; left: 0;
|
| 636 |
width: 100%; height: 100%;
|
| 637 |
-
pointer-events: none; z-index: 2;
|
| 638 |
-
overflow: visible;
|
| 639 |
-
}
|
| 640 |
-
path.wire {
|
| 641 |
-
fill: none; stroke: var(--wire); stroke-width: 2.5;
|
| 642 |
-
stroke-linecap: round;
|
| 643 |
}
|
|
|
|
| 644 |
path.wire.active {
|
| 645 |
stroke: var(--wire-active); stroke-width: 3;
|
| 646 |
-
stroke-dasharray: 8 4;
|
| 647 |
-
animation: flow 0.6s linear infinite;
|
| 648 |
}
|
| 649 |
@keyframes flow { to { stroke-dashoffset: -24; } }
|
| 650 |
/* ── Nodes ── */
|
| 651 |
.node {
|
| 652 |
position: absolute; width: 295px;
|
| 653 |
-
background: var(--node-bg);
|
| 654 |
-
border:
|
| 655 |
-
|
| 656 |
-
box-shadow: 0 8px 28px rgba(0,0,0,0.5);
|
| 657 |
-
z-index: 10; display: flex; flex-direction: column;
|
| 658 |
-
transition: box-shadow 0.2s;
|
| 659 |
-
}
|
| 660 |
-
.node:hover {
|
| 661 |
-
box-shadow: 0 8px 28px rgba(0,0,0,0.5),
|
| 662 |
-
0 0 0 1px rgba(124,106,247,0.3);
|
| 663 |
}
|
|
|
|
| 664 |
.node.fixed-height { height: 340px; }
|
| 665 |
.node-header {
|
| 666 |
-
background: var(--node-header);
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
border-radius: 9px 9px 0 0;
|
| 670 |
-
font-size: 11px; font-weight: 700;
|
| 671 |
-
cursor: grab;
|
| 672 |
display: flex; justify-content: space-between; align-items: center;
|
| 673 |
flex-shrink: 0; user-select: none;
|
| 674 |
}
|
| 675 |
.node-header:active { cursor: grabbing; }
|
| 676 |
.node-header .id {
|
| 677 |
font-size: 10px; color: var(--muted);
|
| 678 |
-
background: rgba(255,255,255,0.04);
|
| 679 |
-
padding: 2px 7px; border-radius: 4px;
|
| 680 |
-
}
|
| 681 |
-
.node-body {
|
| 682 |
-
padding: 10px;
|
| 683 |
-
display: flex; flex-direction: column; gap: 8px;
|
| 684 |
-
flex: 1; overflow: hidden;
|
| 685 |
}
|
|
|
|
| 686 |
/* ── Ports ── */
|
| 687 |
.port {
|
| 688 |
position: absolute; width: 11px; height: 11px;
|
| 689 |
-
background: var(--node-bg);
|
| 690 |
-
border: 2px solid var(--port);
|
| 691 |
border-radius: 50%; z-index: 30;
|
| 692 |
}
|
| 693 |
.port.out { right: -6px; }
|
| 694 |
.port.in { left: -6px; }
|
| 695 |
/* ── Labels ── */
|
| 696 |
label {
|
| 697 |
-
font-size: 10px; color: var(--muted);
|
| 698 |
-
|
| 699 |
-
letter-spacing: 0.07em; text-transform: uppercase;
|
| 700 |
}
|
| 701 |
input[type="file"] { display: none; }
|
| 702 |
/* ── Upload Zone ── */
|
| 703 |
.file-upload {
|
| 704 |
-
border: 1.5px dashed var(--node-border);
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
font-size: 11px; color: var(--muted);
|
| 708 |
-
transition: border-color 0.2s, background 0.2s;
|
| 709 |
-
background: rgba(255,255,255,0.01);
|
| 710 |
display: flex; flex-direction: column; align-items: center; gap: 5px;
|
| 711 |
}
|
| 712 |
-
.file-upload:hover {
|
| 713 |
-
border-color: var(--accent);
|
| 714 |
-
background: rgba(124,106,247,0.04);
|
| 715 |
-
}
|
| 716 |
.file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
|
| 717 |
.file-upload:hover svg { opacity: 0.9; }
|
| 718 |
/* ── Preview wrapper ── */
|
| 719 |
.preview-wrap {
|
| 720 |
-
display: none; position: relative;
|
| 721 |
-
border
|
| 722 |
-
border: 1px solid var(--node-border); background: #000;
|
| 723 |
}
|
| 724 |
.preview-wrap.visible { display: block; }
|
| 725 |
.img-preview { width: 100%; height: 170px; object-fit: contain; display: block; }
|
| 726 |
/* ── Clear button ── */
|
| 727 |
.clear-btn {
|
| 728 |
-
position: absolute; top: 6px; right: 6px;
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
border: 1px solid var(--node-border);
|
| 732 |
-
color: var(--accent3); cursor: pointer;
|
| 733 |
display: flex; align-items: center; justify-content: center;
|
| 734 |
transition: background 0.18s, border-color 0.18s, transform 0.12s;
|
| 735 |
z-index: 20; backdrop-filter: blur(6px);
|
| 736 |
}
|
| 737 |
-
.clear-btn:hover {
|
| 738 |
-
background: rgba(255,107,107,0.18);
|
| 739 |
-
border-color: var(--accent3); transform: scale(1.08);
|
| 740 |
-
}
|
| 741 |
.clear-btn:active { transform: scale(0.95); }
|
| 742 |
.clear-btn svg { pointer-events: none; }
|
| 743 |
/* ── Filename chip ── */
|
| 744 |
.img-chip {
|
| 745 |
display: none; align-items: center; gap: 6px;
|
| 746 |
-
background: rgba(124,106,247,0.08);
|
| 747 |
-
border:
|
| 748 |
-
border-radius: 5px; padding: 4px 8px;
|
| 749 |
-
font-size: 9px; color: var(--muted); overflow: hidden;
|
| 750 |
}
|
| 751 |
.img-chip.visible { display: flex; }
|
| 752 |
-
.img-chip .chip-dot {
|
| 753 |
-
|
| 754 |
-
background: var(--accent2); flex-shrink: 0;
|
| 755 |
-
box-shadow: 0 0 4px var(--accent2);
|
| 756 |
-
}
|
| 757 |
-
.img-chip .chip-name {
|
| 758 |
-
overflow: hidden; text-overflow: ellipsis;
|
| 759 |
-
white-space: nowrap; flex: 1;
|
| 760 |
-
color: var(--text); font-size: 9px;
|
| 761 |
-
}
|
| 762 |
.img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 9px; }
|
| 763 |
select, textarea {
|
| 764 |
-
width: 100%; background: rgba(0,0,0,0.3);
|
| 765 |
-
|
| 766 |
-
color: var(--text); padding: 7px 9px;
|
| 767 |
-
border-radius: 5px; outline: none;
|
| 768 |
font-size: 11px; font-family: 'JetBrains Mono', monospace;
|
| 769 |
resize: none; transition: border-color 0.2s;
|
| 770 |
}
|
|
@@ -773,99 +721,77 @@ async def homepage(request: Request):
|
|
| 773 |
button.run-btn {
|
| 774 |
background: linear-gradient(135deg, var(--accent), #9b59b6);
|
| 775 |
color: #fff; border: none; padding: 8px; border-radius: 6px;
|
| 776 |
-
font-weight: 700; font-size: 11px;
|
| 777 |
-
|
| 778 |
-
transition: opacity 0.2s, transform 0.1s;
|
| 779 |
display: flex; justify-content: center; align-items: center; gap: 8px;
|
| 780 |
letter-spacing: 0.04em; flex-shrink: 0;
|
| 781 |
}
|
| 782 |
-
button.run-btn:hover
|
| 783 |
-
button.run-btn:active
|
| 784 |
button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
|
| 785 |
/* ── Output node ── */
|
| 786 |
-
.output-node-body {
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
.output-header-row {
|
| 791 |
-
display: flex; align-items: center;
|
| 792 |
-
justify-content: space-between; flex-shrink: 0;
|
| 793 |
-
}
|
| 794 |
-
/* ── Copy button ── */
|
| 795 |
-
.copy-btn {
|
| 796 |
display: flex; align-items: center; gap: 5px;
|
| 797 |
-
background: rgba(124,106,247,0.10);
|
| 798 |
-
border: 1px solid rgba(124,106,247,0.25);
|
| 799 |
border-radius: 5px; padding: 3px 8px;
|
| 800 |
-
font-size: 9px; font-weight: 700;
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
transition: background 0.18s, border-color 0.18s, transform 0.1s;
|
| 805 |
-
flex-shrink: 0;
|
| 806 |
}
|
| 807 |
-
.
|
| 808 |
-
.
|
| 809 |
-
.
|
| 810 |
-
background: rgba(78,205,196,0.
|
| 811 |
-
border-color: var(--accent2); color: var(--accent2);
|
| 812 |
}
|
| 813 |
-
.
|
|
|
|
|
|
|
| 814 |
.output-box {
|
| 815 |
-
background: rgba(0,0,0,0.4);
|
| 816 |
-
border:
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
font-size: 11px; line-height: 1.6;
|
| 820 |
-
color: #c8c8e0; white-space: pre-wrap;
|
| 821 |
-
user-select: text;
|
| 822 |
-
font-family: 'JetBrains Mono', monospace; min-height: 0;
|
| 823 |
}
|
| 824 |
-
/* ── Grounding ── */
|
|
|
|
|
|
|
| 825 |
.ground-canvas-wrap {
|
| 826 |
-
position: relative; flex: 1;
|
| 827 |
-
border:
|
| 828 |
-
border-radius: 5px; overflow: hidden;
|
| 829 |
-
background: #000; min-height: 0;
|
| 830 |
}
|
| 831 |
.ground-canvas-wrap canvas { width: 100%; height: 100%; object-fit: contain; display: block; }
|
| 832 |
.ground-placeholder {
|
| 833 |
-
position: absolute; inset: 0;
|
| 834 |
-
|
| 835 |
-
font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
|
| 836 |
}
|
| 837 |
.loader {
|
| 838 |
-
width: 11px; height: 11px;
|
| 839 |
-
border: 2px solid rgba(255,255,255,0.3);
|
| 840 |
border-top-color: #fff; border-radius: 50%;
|
| 841 |
animation: spin 0.7s linear infinite; display: none;
|
| 842 |
}
|
| 843 |
@keyframes spin { to { transform: rotate(360deg); } }
|
| 844 |
-
.status-dot {
|
| 845 |
-
width: 6px; height: 6px; border-radius: 50%;
|
| 846 |
-
background: var(--muted); display: inline-block; margin-right: 6px;
|
| 847 |
-
}
|
| 848 |
.status-dot.active { background: var(--accent2); box-shadow: 0 0 5px var(--accent2); }
|
| 849 |
/* ── Model badges ── */
|
| 850 |
.model-badge {
|
| 851 |
-
display: inline-block; padding: 2px 7px;
|
| 852 |
-
|
| 853 |
-
letter-spacing: 0.06em; text-transform: uppercase;
|
| 854 |
}
|
|
|
|
|
|
|
| 855 |
.model-badge.q4bunred { background: rgba(255,80,80,0.18); color: #ff5050; border: 1px solid rgba(255,80,80,0.40); }
|
| 856 |
.model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
|
| 857 |
.model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
|
| 858 |
-
.model-badge.qvl2b { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
|
| 859 |
-
.model-badge.qvl4b { background: rgba(255,100,80,0.15); color: #ff6450; border: 1px solid rgba(255,100,80,0.35); }
|
| 860 |
.model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
|
| 861 |
.model-badge.g4e2b { background: rgba(66,197,107,0.15); color: #42c56b; border: 1px solid rgba(66,197,107,0.35); }
|
| 862 |
.model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
|
| 863 |
.model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
|
| 864 |
.model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
|
| 865 |
-
.model-info-box {
|
| 866 |
-
border-radius: 6px; padding: 9px;
|
| 867 |
-
font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0;
|
| 868 |
-
}
|
| 869 |
.canvas-footer { height: 36px; }
|
| 870 |
</style>
|
| 871 |
</head>
|
|
@@ -897,8 +823,7 @@ async def homepage(request: Request):
|
|
| 897 |
<label>Upload Image</label>
|
| 898 |
<div class="file-upload" id="dropZone">
|
| 899 |
<svg width="30" height="30" viewBox="0 0 24 24" fill="none"
|
| 900 |
-
stroke="#7c6af7" stroke-width="1.5"
|
| 901 |
-
stroke-linecap="round" stroke-linejoin="round">
|
| 902 |
<rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
|
| 903 |
<circle cx="8.5" cy="8.5" r="1.5"/>
|
| 904 |
<polyline points="21 15 16 10 5 21"/>
|
|
@@ -910,8 +835,7 @@ async def homepage(request: Request):
|
|
| 910 |
<img id="imgPreview" class="img-preview" />
|
| 911 |
<button class="clear-btn" id="clearBtn" title="Remove image">
|
| 912 |
<svg width="12" height="12" viewBox="0 0 24 24" fill="none"
|
| 913 |
-
stroke="currentColor" stroke-width="2.5"
|
| 914 |
-
stroke-linecap="round" stroke-linejoin="round">
|
| 915 |
<line x1="18" y1="6" x2="6" y2="18"/>
|
| 916 |
<line x1="6" y1="6" x2="18" y2="18"/>
|
| 917 |
</svg>
|
|
@@ -937,11 +861,11 @@ async def homepage(request: Request):
|
|
| 937 |
<div>
|
| 938 |
<label>Active Model</label>
|
| 939 |
<select id="modelSelect">
|
|
|
|
|
|
|
| 940 |
<option value="qwen_4b_unredacted">Qwen3.5-4B-Unredacted-MAX</option>
|
| 941 |
<option value="qwen_4b">Qwen3.5-4B</option>
|
| 942 |
<option value="qwen_2b">Qwen3.5-2B</option>
|
| 943 |
-
<option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
|
| 944 |
-
<option value="qwen_vl_4b">Qwen3-VL-4B-Instruct</option>
|
| 945 |
<option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
|
| 946 |
<option value="gemma4_e2b">Gemma4-E2B-it (Google)</option>
|
| 947 |
<option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
|
|
@@ -950,10 +874,10 @@ async def homepage(request: Request):
|
|
| 950 |
</select>
|
| 951 |
</div>
|
| 952 |
<div id="modelInfoBox" class="model-info-box"
|
| 953 |
-
style="background:rgba(255,
|
| 954 |
-
<span class="model-badge
|
| 955 |
-
Qwen3
|
| 956 |
-
|
| 957 |
</div>
|
| 958 |
<div style="flex:1;"></div>
|
| 959 |
</div>
|
|
@@ -1000,10 +924,9 @@ async def homepage(request: Request):
|
|
| 1000 |
<div class="output-node-body">
|
| 1001 |
<div class="output-header-row">
|
| 1002 |
<label style="margin-bottom:0;">Streamed Result</label>
|
| 1003 |
-
<button class="
|
| 1004 |
<svg width="11" height="11" viewBox="0 0 24 24" fill="none"
|
| 1005 |
-
stroke="currentColor" stroke-width="2.2"
|
| 1006 |
-
stroke-linecap="round" stroke-linejoin="round">
|
| 1007 |
<rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
|
| 1008 |
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
|
| 1009 |
</svg>
|
|
@@ -1021,8 +944,19 @@ async def homepage(request: Request):
|
|
| 1021 |
<span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
|
| 1022 |
<span class="id">ID: 05</span>
|
| 1023 |
</div>
|
| 1024 |
-
<div class="node-body">
|
| 1025 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1026 |
<div class="ground-canvas-wrap">
|
| 1027 |
<canvas id="groundCanvas"></canvas>
|
| 1028 |
<div class="ground-placeholder" id="groundPlaceholder">
|
|
@@ -1042,9 +976,8 @@ async def homepage(request: Request):
|
|
| 1042 |
const canvasEl = document.getElementById('canvas');
|
| 1043 |
function portCenter(id) {
|
| 1044 |
const el = document.getElementById(id);
|
| 1045 |
-
if (!el) return {
|
| 1046 |
-
const er = el.getBoundingClientRect();
|
| 1047 |
-
const cr = canvasEl.getBoundingClientRect();
|
| 1048 |
return { x: er.left + er.width/2 - cr.left, y: er.top + er.height/2 - cr.top };
|
| 1049 |
}
|
| 1050 |
function bezier(p1, p2) {
|
|
@@ -1071,17 +1004,16 @@ document.querySelectorAll('.node').forEach(node => {
|
|
| 1071 |
const header = node.querySelector('.node-header');
|
| 1072 |
let drag = false, sx, sy, il, it;
|
| 1073 |
header.addEventListener('mousedown', e => {
|
| 1074 |
-
drag
|
| 1075 |
-
il
|
| 1076 |
-
node.style.zIndex
|
| 1077 |
});
|
| 1078 |
document.addEventListener('mousemove', e => {
|
| 1079 |
if (!drag) return;
|
| 1080 |
-
node.style.left
|
| 1081 |
-
node.style.top = `${it + e.clientY - sy}px`;
|
| 1082 |
updateWires();
|
| 1083 |
});
|
| 1084 |
-
document.addEventListener('mouseup', () => { if
|
| 1085 |
});
|
| 1086 |
window.addEventListener('resize', updateWires);
|
| 1087 |
window.addEventListener('scroll', updateWires);
|
|
@@ -1103,30 +1035,22 @@ const chipSize = document.getElementById('chipSize');
|
|
| 1103 |
const dotImg = document.getElementById('dot-img');
|
| 1104 |
|
| 1105 |
function formatBytes(b) {
|
| 1106 |
-
if (b
|
| 1107 |
-
|
| 1108 |
-
return (b/1048576).toFixed(1) + ' MB';
|
| 1109 |
}
|
| 1110 |
function handleFile(file) {
|
| 1111 |
-
if (!file
|
| 1112 |
-
currentFile
|
| 1113 |
-
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
chipName.textContent = file.name;
|
| 1117 |
-
chipSize.textContent = formatBytes(file.size);
|
| 1118 |
-
imgChip.classList.add('visible');
|
| 1119 |
-
dotImg.classList.add('active');
|
| 1120 |
requestAnimationFrame(updateWires);
|
| 1121 |
}
|
| 1122 |
function clearImage() {
|
| 1123 |
-
currentFile
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
-
chipName.textContent = '—'; chipSize.textContent = '';
|
| 1128 |
-
fileInput.value = ''; dotImg.classList.remove('active');
|
| 1129 |
-
requestAnimationFrame(updateWires);
|
| 1130 |
}
|
| 1131 |
dropZone.onclick = () => fileInput.click();
|
| 1132 |
fileInput.onchange = e => handleFile(e.target.files[0]);
|
|
@@ -1147,6 +1071,18 @@ const dotModel = document.getElementById('dot-model');
|
|
| 1147 |
dotModel.classList.add('active');
|
| 1148 |
|
| 1149 |
const MODEL_INFO = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1150 |
qwen_4b_unredacted: {
|
| 1151 |
html: `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
|
| 1152 |
Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
|
|
@@ -1165,18 +1101,6 @@ const MODEL_INFO = {
|
|
| 1165 |
Lightweight & fast — ideal for quick Query, Caption, Point & Detect tasks.`,
|
| 1166 |
bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
|
| 1167 |
},
|
| 1168 |
-
qwen_vl_2b: {
|
| 1169 |
-
html: `<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
|
| 1170 |
-
Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
|
| 1171 |
-
Strong spatial grounding, OCR & instruction-following.`,
|
| 1172 |
-
bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.25)',
|
| 1173 |
-
},
|
| 1174 |
-
qwen_vl_4b: {
|
| 1175 |
-
html: `<span class="model-badge qvl4b">QWEN3-VL · 4B</span><br><br>
|
| 1176 |
-
Qwen3-VL-4B-Instruct — enhanced vision-language model by Alibaba Cloud.
|
| 1177 |
-
Superior spatial grounding, richer OCR & stronger multi-step reasoning.`,
|
| 1178 |
-
bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
|
| 1179 |
-
},
|
| 1180 |
lfm_450: {
|
| 1181 |
html: `<span class="model-badge lfm450">LFM · 450M</span><br><br>
|
| 1182 |
LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
|
|
@@ -1227,99 +1151,52 @@ const PLACEHOLDERS = {
|
|
| 1227 |
Point: 'e.g., The gun held by the person.',
|
| 1228 |
Detect: 'e.g., The headlight of the car.',
|
| 1229 |
};
|
| 1230 |
-
categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value]
|
| 1231 |
|
| 1232 |
// ══════════════════════════════════════════════
|
| 1233 |
// ROBUST JSON EXTRACTOR
|
| 1234 |
-
// Strategy:
|
| 1235 |
-
// 1. Strip ALL <think>…</think> blocks (greedy,
|
| 1236 |
-
// handles the tag appearing after the JSON too)
|
| 1237 |
-
// 2. Strip markdown fences
|
| 1238 |
-
// 3. Find the LAST occurrence of a JSON array [ ]
|
| 1239 |
-
// or object { } — models typically emit the
|
| 1240 |
-
// clean JSON block after their reasoning prose
|
| 1241 |
-
// 4. Use a bracket-depth walker to extract it
|
| 1242 |
-
// precisely without cutting off nested objects
|
| 1243 |
// ══════════════════════════════════════════════
|
| 1244 |
function extractGroundingJSON(raw) {
|
| 1245 |
-
//
|
| 1246 |
-
|
| 1247 |
-
// Run multiple passes in case of nested/malformed tags
|
| 1248 |
-
let text = raw;
|
| 1249 |
-
let prev = null;
|
| 1250 |
while (prev !== text) {
|
| 1251 |
prev = text;
|
| 1252 |
text = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
| 1253 |
}
|
|
|
|
|
|
|
|
|
|
| 1254 |
|
| 1255 |
-
//
|
| 1256 |
-
text = text.replace(/```(?:json)?[\\s\\S]*?```/gi, function(m) {
|
| 1257 |
-
// Keep the inner content, just remove the fences
|
| 1258 |
-
return m.replace(/```(?:json)?/gi, '').replace(/```/g, '');
|
| 1259 |
-
});
|
| 1260 |
-
|
| 1261 |
-
// Step 3 — strip any remaining lone fence markers
|
| 1262 |
-
text = text.replace(/```/g, '');
|
| 1263 |
-
text = text.trim();
|
| 1264 |
-
|
| 1265 |
-
// Helper: walk from startIdx and extract a balanced
|
| 1266 |
-
// bracket expression (open/close must match).
|
| 1267 |
function extractBalanced(str, startIdx, openCh, closeCh) {
|
| 1268 |
-
let depth
|
| 1269 |
-
for (let i
|
| 1270 |
-
const c
|
| 1271 |
-
if (esc)
|
| 1272 |
-
if (c
|
| 1273 |
-
if (c
|
| 1274 |
-
if (inStr)
|
| 1275 |
-
if (c
|
| 1276 |
-
if (c
|
| 1277 |
depth--;
|
| 1278 |
-
if (depth
|
| 1279 |
-
try { return JSON.parse(str.slice(startIdx, i
|
| 1280 |
-
catch(_) { return null; }
|
| 1281 |
}
|
| 1282 |
}
|
| 1283 |
}
|
| 1284 |
return null;
|
| 1285 |
}
|
| 1286 |
|
| 1287 |
-
//
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
for (let i = text.length - 1; i >= 0; i--) {
|
| 1291 |
-
if (text[i] === '[') { lastArrIdx = i; break; }
|
| 1292 |
-
}
|
| 1293 |
-
if (lastArrIdx !== -1) {
|
| 1294 |
-
const result = extractBalanced(text, lastArrIdx, '[', ']');
|
| 1295 |
-
if (result !== null) return result;
|
| 1296 |
-
}
|
| 1297 |
-
|
| 1298 |
-
// Step 5 — find the LAST JSON object in the text
|
| 1299 |
-
let lastObjIdx = -1;
|
| 1300 |
-
for (let i = text.length - 1; i >= 0; i--) {
|
| 1301 |
-
if (text[i] === '{') { lastObjIdx = i; break; }
|
| 1302 |
}
|
| 1303 |
-
|
| 1304 |
-
|
| 1305 |
-
if (result !== null) return result;
|
| 1306 |
}
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
const
|
| 1310 |
-
if (firstArr !== -1) {
|
| 1311 |
-
const result = extractBalanced(text, firstArr, '[', ']');
|
| 1312 |
-
if (result !== null) return result;
|
| 1313 |
-
}
|
| 1314 |
-
|
| 1315 |
-
// Step 7 — try FIRST object (fallback)
|
| 1316 |
-
const firstObj = text.indexOf('{');
|
| 1317 |
-
if (firstObj !== -1) {
|
| 1318 |
-
const result = extractBalanced(text, firstObj, '{', '}');
|
| 1319 |
-
if (result !== null) return result;
|
| 1320 |
-
}
|
| 1321 |
-
|
| 1322 |
-
// Step 8 — last resort full parse
|
| 1323 |
try { return JSON.parse(text); } catch(_) {}
|
| 1324 |
return null;
|
| 1325 |
}
|
|
@@ -1330,122 +1207,95 @@ function extractGroundingJSON(raw) {
|
|
| 1330 |
const groundCanvas = document.getElementById('groundCanvas');
|
| 1331 |
const groundPlaceholder = document.getElementById('groundPlaceholder');
|
| 1332 |
const gCtx = groundCanvas.getContext('2d');
|
|
|
|
| 1333 |
|
| 1334 |
const PALETTE = ['#4ecdc4','#7c6af7','#ff6b6b','#ffd93d','#6bcb77','#ff922b','#cc5de8','#339af0'];
|
| 1335 |
|
| 1336 |
function hexToRgba(hex, alpha) {
|
| 1337 |
-
const r
|
| 1338 |
-
const g = parseInt(hex.slice(3,5),16);
|
| 1339 |
-
const b = parseInt(hex.slice(5,7),16);
|
| 1340 |
return `rgba(${r},${g},${b},${alpha})`;
|
| 1341 |
}
|
| 1342 |
function roundRect(ctx, x, y, w, h, r) {
|
| 1343 |
-
ctx.beginPath();
|
| 1344 |
-
ctx.moveTo(x+r,y);
|
| 1345 |
ctx.lineTo(x+w-r,y); ctx.quadraticCurveTo(x+w,y,x+w,y+r);
|
| 1346 |
ctx.lineTo(x+w,y+h-r); ctx.quadraticCurveTo(x+w,y+h,x+w-r,y+h);
|
| 1347 |
ctx.lineTo(x+r,y+h); ctx.quadraticCurveTo(x,y+h,x,y+h-r);
|
| 1348 |
-
ctx.lineTo(x,y+r); ctx.quadraticCurveTo(x,y,x+r,y);
|
| 1349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1350 |
}
|
| 1351 |
|
| 1352 |
function drawGrounding(imgSrc, rawText) {
|
| 1353 |
const parsed = extractGroundingJSON(rawText);
|
| 1354 |
-
if (!parsed) {
|
| 1355 |
-
console.warn('Grounding: could not extract JSON:', rawText.slice(0, 200));
|
| 1356 |
-
return;
|
| 1357 |
-
}
|
| 1358 |
|
| 1359 |
const img = new Image();
|
| 1360 |
img.onload = () => {
|
| 1361 |
-
const W
|
| 1362 |
-
groundCanvas.width
|
| 1363 |
-
groundCanvas.height = H;
|
| 1364 |
gCtx.drawImage(img, 0, 0);
|
| 1365 |
-
groundPlaceholder.style.display
|
| 1366 |
|
| 1367 |
-
const lw
|
| 1368 |
-
|
| 1369 |
-
gCtx.
|
| 1370 |
-
gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
|
| 1371 |
|
| 1372 |
const items = Array.isArray(parsed) ? parsed : [parsed];
|
| 1373 |
-
|
| 1374 |
items.forEach((item, i) => {
|
| 1375 |
const col = PALETTE[i % PALETTE.length];
|
| 1376 |
|
| 1377 |
-
// ──
|
| 1378 |
let bbox = null;
|
| 1379 |
-
if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length
|
| 1380 |
-
|
| 1381 |
-
else if (Array.isArray(item
|
| 1382 |
-
bbox = item.bbox;
|
| 1383 |
-
else if (Array.isArray(item) && item.length === 4
|
| 1384 |
-
&& item.every(n => typeof n === 'number'))
|
| 1385 |
-
bbox = item;
|
| 1386 |
|
| 1387 |
if (bbox) {
|
| 1388 |
-
let [x1,y1,x2,y2]
|
| 1389 |
-
|
| 1390 |
-
if (
|
| 1391 |
-
|
| 1392 |
-
}
|
| 1393 |
-
|
| 1394 |
-
|
| 1395 |
-
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
|
| 1399 |
-
|
| 1400 |
-
gCtx.fillStyle = hexToRgba(col, 0.20);
|
| 1401 |
-
gCtx.fillRect(x1,y1,bw,bh);
|
| 1402 |
-
gCtx.strokeStyle = col;
|
| 1403 |
-
gCtx.lineWidth = lw;
|
| 1404 |
-
gCtx.strokeRect(x1,y1,bw,bh);
|
| 1405 |
-
|
| 1406 |
-
const tw = gCtx.measureText(lbl).width;
|
| 1407 |
-
const ph = fs*1.45, pw = tw+12;
|
| 1408 |
-
const lx = x1, ly = Math.max(0, y1-ph);
|
| 1409 |
-
gCtx.fillStyle = col;
|
| 1410 |
-
roundRect(gCtx,lx,ly,pw,ph,4); gCtx.fill();
|
| 1411 |
-
gCtx.fillStyle = '#fff';
|
| 1412 |
-
gCtx.fillText(lbl, lx+6, ly+ph*0.76);
|
| 1413 |
return;
|
| 1414 |
}
|
| 1415 |
|
| 1416 |
-
// ── Point
|
| 1417 |
let pt = null;
|
| 1418 |
-
if (Array.isArray(item?.point_2d) && item.point_2d.length
|
| 1419 |
-
|
| 1420 |
-
else if (Array.isArray(item
|
| 1421 |
-
pt = item.point;
|
| 1422 |
-
else if (Array.isArray(item) && item.length === 2
|
| 1423 |
-
&& item.every(n => typeof n === 'number'))
|
| 1424 |
-
pt = item;
|
| 1425 |
|
| 1426 |
if (pt) {
|
| 1427 |
-
let [x,y]
|
| 1428 |
-
if (x
|
| 1429 |
-
const r
|
| 1430 |
-
const lbl
|
| 1431 |
-
|
| 1432 |
-
gCtx.
|
| 1433 |
-
gCtx.arc(x,
|
| 1434 |
-
gCtx.fillStyle
|
| 1435 |
-
|
| 1436 |
-
gCtx.
|
| 1437 |
-
gCtx.arc(x, y, r, 0, Math.PI*2);
|
| 1438 |
-
gCtx.fillStyle = col; gCtx.fill();
|
| 1439 |
-
gCtx.strokeStyle = '#fff';
|
| 1440 |
-
gCtx.lineWidth = Math.max(1.5, lw);
|
| 1441 |
-
gCtx.stroke();
|
| 1442 |
-
|
| 1443 |
-
gCtx.fillStyle = '#fff';
|
| 1444 |
-
gCtx.fillText(lbl, x+r+5, y+fs*0.4);
|
| 1445 |
}
|
| 1446 |
});
|
|
|
|
|
|
|
|
|
|
| 1447 |
};
|
| 1448 |
-
img.onerror = () => console.error('Grounding: failed to load image
|
| 1449 |
img.src = imgSrc;
|
| 1450 |
}
|
| 1451 |
|
|
@@ -1460,31 +1310,26 @@ function resetCopyBtn() {
|
|
| 1460 |
copyBtn.classList.remove('copied');
|
| 1461 |
copyBtn.innerHTML = `
|
| 1462 |
<svg width="11" height="11" viewBox="0 0 24 24" fill="none"
|
| 1463 |
-
stroke="currentColor" stroke-width="2.2"
|
| 1464 |
-
stroke-linecap="round" stroke-linejoin="round">
|
| 1465 |
<rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
|
| 1466 |
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
|
| 1467 |
</svg> COPY`;
|
| 1468 |
}
|
| 1469 |
-
|
| 1470 |
copyBtn.onclick = () => {
|
| 1471 |
-
const txt = outputBox.innerText
|
| 1472 |
-
if (!txt
|
| 1473 |
navigator.clipboard.writeText(txt).then(() => {
|
| 1474 |
copyBtn.classList.add('copied');
|
| 1475 |
copyBtn.innerHTML = `
|
| 1476 |
<svg width="11" height="11" viewBox="0 0 24 24" fill="none"
|
| 1477 |
-
stroke="currentColor" stroke-width="2.5"
|
| 1478 |
-
stroke-linecap="round" stroke-linejoin="round">
|
| 1479 |
<polyline points="20 6 9 17 4 12"/>
|
| 1480 |
</svg> COPIED`;
|
| 1481 |
-
clearTimeout(copyTimer);
|
| 1482 |
-
copyTimer = setTimeout(resetCopyBtn, 2000);
|
| 1483 |
}).catch(() => {
|
| 1484 |
-
const ta
|
| 1485 |
-
ta.
|
| 1486 |
-
document.body.appendChild(ta); ta.select();
|
| 1487 |
-
document.execCommand('copy'); document.body.removeChild(ta);
|
| 1488 |
});
|
| 1489 |
};
|
| 1490 |
|
|
@@ -1503,66 +1348,53 @@ runBtn.onclick = async () => {
|
|
| 1503 |
const promptStr = promptInput.value.trim();
|
| 1504 |
if (!promptStr) { alert('Please enter a prompt directive.'); return; }
|
| 1505 |
|
| 1506 |
-
runBtn.disabled
|
| 1507 |
-
|
| 1508 |
-
|
| 1509 |
-
|
| 1510 |
-
|
| 1511 |
-
gCtx.clearRect(0, 0, groundCanvas.width, groundCanvas.height);
|
| 1512 |
dotTask.classList.add('active');
|
| 1513 |
-
dotOut.classList.remove('active');
|
| 1514 |
-
|
| 1515 |
-
allWires.forEach(id => document.getElementById(id)?.classList.add('active'));
|
| 1516 |
resetCopyBtn();
|
| 1517 |
|
| 1518 |
-
const formData
|
| 1519 |
formData.append('image', currentFile);
|
| 1520 |
formData.append('category', categorySelect.value);
|
| 1521 |
formData.append('prompt', promptStr);
|
| 1522 |
formData.append('model_id', modelSelect.value);
|
| 1523 |
|
| 1524 |
-
let fullText
|
| 1525 |
-
|
| 1526 |
|
| 1527 |
try {
|
| 1528 |
-
const response
|
| 1529 |
-
if (!response.ok) {
|
| 1530 |
-
const err = await response.json();
|
| 1531 |
-
throw new Error(err.error || 'Execution failed.');
|
| 1532 |
-
}
|
| 1533 |
-
|
| 1534 |
-
const reader = response.body.getReader();
|
| 1535 |
-
const decoder = new TextDecoder('utf-8');
|
| 1536 |
-
let buffer = '';
|
| 1537 |
|
|
|
|
|
|
|
| 1538 |
while (true) {
|
| 1539 |
-
const {
|
| 1540 |
-
|
| 1541 |
-
|
| 1542 |
-
const lines = buffer.split('\\n\\n');
|
| 1543 |
-
buffer = lines.pop();
|
| 1544 |
for (const line of lines) {
|
| 1545 |
if (!line.startsWith('data: ')) continue;
|
| 1546 |
-
const payload
|
| 1547 |
-
if (payload
|
| 1548 |
try {
|
| 1549 |
-
const data
|
| 1550 |
-
if (data.chunk) {
|
| 1551 |
-
fullText += data.chunk;
|
| 1552 |
-
outputBox.innerText = fullText;
|
| 1553 |
-
outputBox.scrollTop = outputBox.scrollHeight;
|
| 1554 |
-
}
|
| 1555 |
} catch(_) {}
|
| 1556 |
}
|
| 1557 |
}
|
| 1558 |
|
| 1559 |
dotOut.classList.add('active');
|
| 1560 |
|
| 1561 |
-
//
|
| 1562 |
-
const cat
|
| 1563 |
-
if ((cat
|
| 1564 |
-
const parsed
|
| 1565 |
-
if (parsed
|
| 1566 |
dotGnd.classList.add('active');
|
| 1567 |
drawGrounding(imgObjectURL, fullText);
|
| 1568 |
} else {
|
|
@@ -1570,14 +1402,12 @@ runBtn.onclick = async () => {
|
|
| 1570 |
}
|
| 1571 |
}
|
| 1572 |
|
| 1573 |
-
} catch
|
| 1574 |
-
outputBox.innerText
|
| 1575 |
-
outputBox.style.color = '#ff6b6b';
|
| 1576 |
} finally {
|
| 1577 |
-
runBtn.disabled
|
| 1578 |
-
btnLoader.style.display = 'none';
|
| 1579 |
dotTask.classList.remove('active');
|
| 1580 |
-
allWires.forEach(id
|
| 1581 |
}
|
| 1582 |
};
|
| 1583 |
</script>
|
|
@@ -1586,4 +1416,4 @@ runBtn.onclick = async () => {
|
|
| 1586 |
"""
|
| 1587 |
|
| 1588 |
if __name__ == "__main__":
|
| 1589 |
-
app.launch(show_error=True
|
|
|
|
| 36 |
else torch.float16
|
| 37 |
)
|
| 38 |
|
| 39 |
+
QWEN_VL_2B_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
|
| 40 |
+
QWEN_VL_4B_MODEL_NAME = "Qwen/Qwen3-VL-4B-Instruct"
|
| 41 |
QWEN_4B_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-4B-Unredacted-MAX"
|
| 42 |
QWEN_4B_MODEL_NAME = "Qwen/Qwen3.5-4B"
|
| 43 |
QWEN_2B_MODEL_NAME = "Qwen/Qwen3.5-2B"
|
|
|
|
|
|
|
| 44 |
LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
|
| 45 |
GEMMA4_E2B_NAME = "google/gemma-4-E2B-it"
|
| 46 |
LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
|
| 47 |
QWEN_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
|
| 48 |
QWEN25_VL_3B_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 49 |
|
| 50 |
+
# ── Qwen3-VL-2B-Instruct ────────────────────────────────
|
| 51 |
+
print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
|
| 52 |
+
try:
|
| 53 |
+
qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 54 |
+
QWEN_VL_2B_MODEL_NAME,
|
| 55 |
+
trust_remote_code=True,
|
| 56 |
+
torch_dtype=torch.bfloat16,
|
| 57 |
+
).to(DEVICE).eval()
|
| 58 |
+
qwen_vl_2b_processor = AutoProcessor.from_pretrained(
|
| 59 |
+
QWEN_VL_2B_MODEL_NAME, trust_remote_code=True
|
| 60 |
+
)
|
| 61 |
+
print("Qwen3-VL-2B model loaded successfully.")
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
|
| 64 |
+
qwen_vl_2b_model = None
|
| 65 |
+
qwen_vl_2b_processor = None
|
| 66 |
+
|
| 67 |
+
# ── Qwen3-VL-4B-Instruct ────────────────────────────────
|
| 68 |
+
print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
|
| 69 |
+
try:
|
| 70 |
+
qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 71 |
+
QWEN_VL_4B_MODEL_NAME,
|
| 72 |
+
trust_remote_code=True,
|
| 73 |
+
torch_dtype=torch.bfloat16,
|
| 74 |
+
).to(DEVICE).eval()
|
| 75 |
+
qwen_vl_4b_processor = AutoProcessor.from_pretrained(
|
| 76 |
+
QWEN_VL_4B_MODEL_NAME, trust_remote_code=True
|
| 77 |
+
)
|
| 78 |
+
print("Qwen3-VL-4B model loaded successfully.")
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
|
| 81 |
+
qwen_vl_4b_model = None
|
| 82 |
+
qwen_vl_4b_processor = None
|
| 83 |
+
|
| 84 |
# ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
|
| 85 |
print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
|
| 86 |
try:
|
|
|
|
| 120 |
qwen_2b_model = None
|
| 121 |
qwen_2b_processor = None
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
# ── LFM2.5-VL-450M ──────────────────────────────────────
|
| 124 |
print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
|
| 125 |
try:
|
|
|
|
| 215 |
# --- Inference Generator (Streaming) ---
|
| 216 |
@spaces.GPU(duration=120)
|
| 217 |
def generate_inference_stream(
|
| 218 |
+
image: Image.Image, category: str, prompt: str, model_id: str = "qwen_vl_2b"
|
| 219 |
):
|
| 220 |
if category == "Query":
|
| 221 |
full_prompt = prompt
|
|
|
|
| 228 |
else:
|
| 229 |
full_prompt = prompt
|
| 230 |
|
| 231 |
+
# ── Qwen3-VL-2B ─────────────���───────────────────────
|
| 232 |
+
if model_id == "qwen_vl_2b":
|
| 233 |
+
if qwen_vl_2b_model is None or qwen_vl_2b_processor is None:
|
| 234 |
+
yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-2B model not loaded.'})}\n\n"
|
| 235 |
yield "data: [DONE]\n\n"
|
| 236 |
return
|
| 237 |
messages = [{"role": "user", "content": [
|
| 238 |
{"type": "image", "image": image},
|
| 239 |
{"type": "text", "text": full_prompt},
|
| 240 |
]}]
|
| 241 |
+
text_input = qwen_vl_2b_processor.apply_chat_template(
|
| 242 |
messages, tokenize=False, add_generation_prompt=True
|
| 243 |
)
|
| 244 |
+
inputs = qwen_vl_2b_processor(
|
| 245 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 246 |
+
).to(qwen_vl_2b_model.device)
|
| 247 |
streamer = TextIteratorStreamer(
|
| 248 |
+
qwen_vl_2b_processor.tokenizer,
|
| 249 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 250 |
)
|
| 251 |
thread = threading.Thread(
|
| 252 |
+
target=qwen_vl_2b_model.generate,
|
| 253 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 254 |
+
use_cache=True, temperature=1.0, do_sample=True),
|
| 255 |
)
|
| 256 |
thread.start()
|
| 257 |
for tok in streamer:
|
|
|
|
| 259 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 260 |
thread.join()
|
| 261 |
|
| 262 |
+
# ── Qwen3-VL-4B ─────────────────────────────────────
|
| 263 |
+
elif model_id == "qwen_vl_4b":
|
| 264 |
+
if qwen_vl_4b_model is None or qwen_vl_4b_processor is None:
|
| 265 |
+
yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-4B model not loaded.'})}\n\n"
|
| 266 |
yield "data: [DONE]\n\n"
|
| 267 |
return
|
| 268 |
messages = [{"role": "user", "content": [
|
| 269 |
{"type": "image", "image": image},
|
| 270 |
{"type": "text", "text": full_prompt},
|
| 271 |
]}]
|
| 272 |
+
text_input = qwen_vl_4b_processor.apply_chat_template(
|
| 273 |
messages, tokenize=False, add_generation_prompt=True
|
| 274 |
)
|
| 275 |
+
inputs = qwen_vl_4b_processor(
|
| 276 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 277 |
+
).to(qwen_vl_4b_model.device)
|
| 278 |
streamer = TextIteratorStreamer(
|
| 279 |
+
qwen_vl_4b_processor.tokenizer,
|
| 280 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 281 |
)
|
| 282 |
thread = threading.Thread(
|
| 283 |
+
target=qwen_vl_4b_model.generate,
|
| 284 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 285 |
+
use_cache=True, temperature=1.0, do_sample=True),
|
| 286 |
)
|
| 287 |
thread.start()
|
| 288 |
for tok in streamer:
|
|
|
|
| 290 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 291 |
thread.join()
|
| 292 |
|
| 293 |
+
# ── Qwen3.5-4B-Unredacted-MAX ───────────────────────
|
| 294 |
+
elif model_id == "qwen_4b_unredacted":
|
| 295 |
+
if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
|
| 296 |
+
yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
|
| 297 |
yield "data: [DONE]\n\n"
|
| 298 |
return
|
| 299 |
messages = [{"role": "user", "content": [
|
| 300 |
{"type": "image", "image": image},
|
| 301 |
{"type": "text", "text": full_prompt},
|
| 302 |
]}]
|
| 303 |
+
text_input = qwen_4b_unredacted_processor.apply_chat_template(
|
| 304 |
messages, tokenize=False, add_generation_prompt=True
|
| 305 |
)
|
| 306 |
+
inputs = qwen_4b_unredacted_processor(
|
| 307 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 308 |
+
).to(qwen_4b_unredacted_model.device)
|
| 309 |
streamer = TextIteratorStreamer(
|
| 310 |
+
qwen_4b_unredacted_processor.tokenizer,
|
| 311 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 312 |
)
|
| 313 |
thread = threading.Thread(
|
| 314 |
+
target=qwen_4b_unredacted_model.generate,
|
| 315 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 316 |
use_cache=True, temperature=1.5, min_p=0.1),
|
| 317 |
)
|
|
|
|
| 321 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 322 |
thread.join()
|
| 323 |
|
| 324 |
+
# ── Qwen3.5-4B ──────────────────────────────────────
|
| 325 |
+
elif model_id == "qwen_4b":
|
| 326 |
+
if qwen_4b_model is None or qwen_4b_processor is None:
|
| 327 |
+
yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
|
| 328 |
yield "data: [DONE]\n\n"
|
| 329 |
return
|
| 330 |
messages = [{"role": "user", "content": [
|
| 331 |
{"type": "image", "image": image},
|
| 332 |
{"type": "text", "text": full_prompt},
|
| 333 |
]}]
|
| 334 |
+
text_input = qwen_4b_processor.apply_chat_template(
|
| 335 |
messages, tokenize=False, add_generation_prompt=True
|
| 336 |
)
|
| 337 |
+
inputs = qwen_4b_processor(
|
| 338 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 339 |
+
).to(qwen_4b_model.device)
|
| 340 |
streamer = TextIteratorStreamer(
|
| 341 |
+
qwen_4b_processor.tokenizer,
|
| 342 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 343 |
)
|
| 344 |
thread = threading.Thread(
|
| 345 |
+
target=qwen_4b_model.generate,
|
| 346 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 347 |
+
use_cache=True, temperature=1.5, min_p=0.1),
|
| 348 |
)
|
| 349 |
thread.start()
|
| 350 |
for tok in streamer:
|
|
|
|
| 352 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 353 |
thread.join()
|
| 354 |
|
| 355 |
+
# ── Qwen3.5-2B ──────────────────────────────────────
|
| 356 |
+
elif model_id == "qwen_2b":
|
| 357 |
+
if qwen_2b_model is None or qwen_2b_processor is None:
|
| 358 |
+
yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B model not loaded.'})}\n\n"
|
| 359 |
yield "data: [DONE]\n\n"
|
| 360 |
return
|
| 361 |
messages = [{"role": "user", "content": [
|
| 362 |
{"type": "image", "image": image},
|
| 363 |
{"type": "text", "text": full_prompt},
|
| 364 |
]}]
|
| 365 |
+
text_input = qwen_2b_processor.apply_chat_template(
|
| 366 |
messages, tokenize=False, add_generation_prompt=True
|
| 367 |
)
|
| 368 |
+
inputs = qwen_2b_processor(
|
| 369 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 370 |
+
).to(qwen_2b_model.device)
|
| 371 |
streamer = TextIteratorStreamer(
|
| 372 |
+
qwen_2b_processor.tokenizer,
|
| 373 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 374 |
)
|
| 375 |
thread = threading.Thread(
|
| 376 |
+
target=qwen_2b_model.generate,
|
| 377 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 378 |
+
use_cache=True, temperature=1.5, min_p=0.1),
|
| 379 |
)
|
| 380 |
thread.start()
|
| 381 |
for tok in streamer:
|
|
|
|
| 546 |
image: UploadFile = File(...),
|
| 547 |
category: str = Form(...),
|
| 548 |
prompt: str = Form(...),
|
| 549 |
+
model_id: str = Form("qwen_vl_2b"),
|
| 550 |
):
|
| 551 |
try:
|
| 552 |
img_bytes = await image.read()
|
|
|
|
| 589 |
}
|
| 590 |
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 591 |
html, body {
|
| 592 |
+
min-height: 100%; background: var(--bg);
|
| 593 |
+
color: var(--text); font-family: 'JetBrains Mono', monospace;
|
|
|
|
|
|
|
| 594 |
}
|
| 595 |
body {
|
| 596 |
background-image:
|
|
|
|
| 603 |
}
|
| 604 |
/* ── Top Bar ── */
|
| 605 |
.top-bar {
|
| 606 |
+
position: sticky; top: 0; left: 0; right: 0; height: 42px;
|
|
|
|
| 607 |
background: rgba(13,13,15,0.95);
|
| 608 |
border-bottom: 1px solid var(--node-border);
|
| 609 |
display: flex; align-items: center; padding: 0 20px;
|
| 610 |
+
gap: 12px; z-index: 1000; backdrop-filter: blur(12px);
|
|
|
|
| 611 |
}
|
| 612 |
.top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
|
| 613 |
.top-bar .sep { color: var(--node-border); }
|
|
|
|
| 621 |
}
|
| 622 |
/* ── Canvas ── */
|
| 623 |
#canvas {
|
| 624 |
+
position: relative; width: 1360px;
|
| 625 |
+
min-height: calc(100vh - 42px); height: 900px; margin: 0 auto;
|
|
|
|
|
|
|
|
|
|
| 626 |
}
|
| 627 |
svg.wires {
|
| 628 |
position: absolute; top: 0; left: 0;
|
| 629 |
width: 100%; height: 100%;
|
| 630 |
+
pointer-events: none; z-index: 2; overflow: visible;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 631 |
}
|
| 632 |
+
path.wire { fill: none; stroke: var(--wire); stroke-width: 2.5; stroke-linecap: round; }
|
| 633 |
path.wire.active {
|
| 634 |
stroke: var(--wire-active); stroke-width: 3;
|
| 635 |
+
stroke-dasharray: 8 4; animation: flow 0.6s linear infinite;
|
|
|
|
| 636 |
}
|
| 637 |
@keyframes flow { to { stroke-dashoffset: -24; } }
|
| 638 |
/* ── Nodes ── */
|
| 639 |
.node {
|
| 640 |
position: absolute; width: 295px;
|
| 641 |
+
background: var(--node-bg); border: 1px solid var(--node-border);
|
| 642 |
+
border-radius: 9px; box-shadow: 0 8px 28px rgba(0,0,0,0.5);
|
| 643 |
+
z-index: 10; display: flex; flex-direction: column; transition: box-shadow 0.2s;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
}
|
| 645 |
+
.node:hover { box-shadow: 0 8px 28px rgba(0,0,0,0.5), 0 0 0 1px rgba(124,106,247,0.3); }
|
| 646 |
.node.fixed-height { height: 340px; }
|
| 647 |
.node-header {
|
| 648 |
+
background: var(--node-header); padding: 7px 12px;
|
| 649 |
+
border-bottom: 1px solid var(--node-border); border-radius: 9px 9px 0 0;
|
| 650 |
+
font-size: 11px; font-weight: 700; cursor: grab;
|
|
|
|
|
|
|
|
|
|
| 651 |
display: flex; justify-content: space-between; align-items: center;
|
| 652 |
flex-shrink: 0; user-select: none;
|
| 653 |
}
|
| 654 |
.node-header:active { cursor: grabbing; }
|
| 655 |
.node-header .id {
|
| 656 |
font-size: 10px; color: var(--muted);
|
| 657 |
+
background: rgba(255,255,255,0.04); padding: 2px 7px; border-radius: 4px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
}
|
| 659 |
+
.node-body { padding: 10px; display: flex; flex-direction: column; gap: 8px; flex: 1; overflow: hidden; }
|
| 660 |
/* ── Ports ── */
|
| 661 |
.port {
|
| 662 |
position: absolute; width: 11px; height: 11px;
|
| 663 |
+
background: var(--node-bg); border: 2px solid var(--port);
|
|
|
|
| 664 |
border-radius: 50%; z-index: 30;
|
| 665 |
}
|
| 666 |
.port.out { right: -6px; }
|
| 667 |
.port.in { left: -6px; }
|
| 668 |
/* ── Labels ── */
|
| 669 |
label {
|
| 670 |
+
font-size: 10px; color: var(--muted); font-weight: 600;
|
| 671 |
+
display: block; margin-bottom: 3px; letter-spacing: 0.07em; text-transform: uppercase;
|
|
|
|
| 672 |
}
|
| 673 |
input[type="file"] { display: none; }
|
| 674 |
/* ── Upload Zone ── */
|
| 675 |
.file-upload {
|
| 676 |
+
border: 1.5px dashed var(--node-border); border-radius: 7px; padding: 12px 10px;
|
| 677 |
+
text-align: center; cursor: pointer; font-size: 11px; color: var(--muted);
|
| 678 |
+
transition: border-color 0.2s, background 0.2s; background: rgba(255,255,255,0.01);
|
|
|
|
|
|
|
|
|
|
| 679 |
display: flex; flex-direction: column; align-items: center; gap: 5px;
|
| 680 |
}
|
| 681 |
+
.file-upload:hover { border-color: var(--accent); background: rgba(124,106,247,0.04); }
|
|
|
|
|
|
|
|
|
|
| 682 |
.file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
|
| 683 |
.file-upload:hover svg { opacity: 0.9; }
|
| 684 |
/* ── Preview wrapper ── */
|
| 685 |
.preview-wrap {
|
| 686 |
+
display: none; position: relative; border-radius: 7px;
|
| 687 |
+
overflow: hidden; border: 1px solid var(--node-border); background: #000;
|
|
|
|
| 688 |
}
|
| 689 |
.preview-wrap.visible { display: block; }
|
| 690 |
.img-preview { width: 100%; height: 170px; object-fit: contain; display: block; }
|
| 691 |
/* ── Clear button ── */
|
| 692 |
.clear-btn {
|
| 693 |
+
position: absolute; top: 6px; right: 6px; width: 24px; height: 24px;
|
| 694 |
+
border-radius: 50%; background: rgba(13,13,15,0.80);
|
| 695 |
+
border: 1px solid var(--node-border); color: var(--accent3); cursor: pointer;
|
|
|
|
|
|
|
| 696 |
display: flex; align-items: center; justify-content: center;
|
| 697 |
transition: background 0.18s, border-color 0.18s, transform 0.12s;
|
| 698 |
z-index: 20; backdrop-filter: blur(6px);
|
| 699 |
}
|
| 700 |
+
.clear-btn:hover { background: rgba(255,107,107,0.18); border-color: var(--accent3); transform: scale(1.08); }
|
|
|
|
|
|
|
|
|
|
| 701 |
.clear-btn:active { transform: scale(0.95); }
|
| 702 |
.clear-btn svg { pointer-events: none; }
|
| 703 |
/* ── Filename chip ── */
|
| 704 |
.img-chip {
|
| 705 |
display: none; align-items: center; gap: 6px;
|
| 706 |
+
background: rgba(124,106,247,0.08); border: 1px solid rgba(124,106,247,0.22);
|
| 707 |
+
border-radius: 5px; padding: 4px 8px; font-size: 9px; color: var(--muted); overflow: hidden;
|
|
|
|
|
|
|
| 708 |
}
|
| 709 |
.img-chip.visible { display: flex; }
|
| 710 |
+
.img-chip .chip-dot { width: 5px; height: 5px; border-radius: 50%; background: var(--accent2); flex-shrink: 0; box-shadow: 0 0 4px var(--accent2); }
|
| 711 |
+
.img-chip .chip-name { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; flex: 1; color: var(--text); font-size: 9px; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
.img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 9px; }
|
| 713 |
select, textarea {
|
| 714 |
+
width: 100%; background: rgba(0,0,0,0.3); border: 1px solid var(--node-border);
|
| 715 |
+
color: var(--text); padding: 7px 9px; border-radius: 5px; outline: none;
|
|
|
|
|
|
|
| 716 |
font-size: 11px; font-family: 'JetBrains Mono', monospace;
|
| 717 |
resize: none; transition: border-color 0.2s;
|
| 718 |
}
|
|
|
|
| 721 |
button.run-btn {
|
| 722 |
background: linear-gradient(135deg, var(--accent), #9b59b6);
|
| 723 |
color: #fff; border: none; padding: 8px; border-radius: 6px;
|
| 724 |
+
font-weight: 700; font-size: 11px; font-family: 'JetBrains Mono', monospace;
|
| 725 |
+
cursor: pointer; transition: opacity 0.2s, transform 0.1s;
|
|
|
|
| 726 |
display: flex; justify-content: center; align-items: center; gap: 8px;
|
| 727 |
letter-spacing: 0.04em; flex-shrink: 0;
|
| 728 |
}
|
| 729 |
+
button.run-btn:hover { opacity: 0.9; }
|
| 730 |
+
button.run-btn:active { transform: scale(0.98); }
|
| 731 |
button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
|
| 732 |
/* ── Output node ── */
|
| 733 |
+
.output-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
|
| 734 |
+
.output-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
|
| 735 |
+
/* ── Icon buttons (copy / download) ── */
|
| 736 |
+
.icon-btn {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 737 |
display: flex; align-items: center; gap: 5px;
|
| 738 |
+
background: rgba(124,106,247,0.10); border: 1px solid rgba(124,106,247,0.25);
|
|
|
|
| 739 |
border-radius: 5px; padding: 3px 8px;
|
| 740 |
+
font-size: 9px; font-weight: 700; font-family: 'JetBrains Mono', monospace;
|
| 741 |
+
color: var(--accent); cursor: pointer; letter-spacing: 0.05em;
|
| 742 |
+
transition: background 0.18s, border-color 0.18s, transform 0.1s; flex-shrink: 0;
|
| 743 |
+
text-decoration: none;
|
|
|
|
|
|
|
| 744 |
}
|
| 745 |
+
.icon-btn:hover { background: rgba(124,106,247,0.22); border-color: var(--accent); }
|
| 746 |
+
.icon-btn:active { transform: scale(0.95); }
|
| 747 |
+
.icon-btn.teal {
|
| 748 |
+
background: rgba(78,205,196,0.10); border-color: rgba(78,205,196,0.25); color: var(--accent2);
|
|
|
|
| 749 |
}
|
| 750 |
+
.icon-btn.teal:hover { background: rgba(78,205,196,0.22); border-color: var(--accent2); }
|
| 751 |
+
.icon-btn.copied { background: rgba(78,205,196,0.15); border-color: var(--accent2); color: var(--accent2); }
|
| 752 |
+
.icon-btn svg { pointer-events: none; flex-shrink: 0; }
|
| 753 |
.output-box {
|
| 754 |
+
background: rgba(0,0,0,0.4); border: 1px solid var(--node-border);
|
| 755 |
+
border-radius: 5px; padding: 10px; flex: 1; overflow-y: auto;
|
| 756 |
+
font-size: 11px; line-height: 1.6; color: #c8c8e0; white-space: pre-wrap;
|
| 757 |
+
user-select: text; font-family: 'JetBrains Mono', monospace; min-height: 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
}
|
| 759 |
+
/* ── Grounding node ── */
|
| 760 |
+
.ground-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
|
| 761 |
+
.ground-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
|
| 762 |
.ground-canvas-wrap {
|
| 763 |
+
position: relative; flex: 1; border: 1px solid var(--node-border);
|
| 764 |
+
border-radius: 5px; overflow: hidden; background: #000; min-height: 0;
|
|
|
|
|
|
|
| 765 |
}
|
| 766 |
.ground-canvas-wrap canvas { width: 100%; height: 100%; object-fit: contain; display: block; }
|
| 767 |
.ground-placeholder {
|
| 768 |
+
position: absolute; inset: 0; display: flex; align-items: center;
|
| 769 |
+
justify-content: center; font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
|
|
|
|
| 770 |
}
|
| 771 |
.loader {
|
| 772 |
+
width: 11px; height: 11px; border: 2px solid rgba(255,255,255,0.3);
|
|
|
|
| 773 |
border-top-color: #fff; border-radius: 50%;
|
| 774 |
animation: spin 0.7s linear infinite; display: none;
|
| 775 |
}
|
| 776 |
@keyframes spin { to { transform: rotate(360deg); } }
|
| 777 |
+
.status-dot { width: 6px; height: 6px; border-radius: 50%; background: var(--muted); display: inline-block; margin-right: 6px; }
|
|
|
|
|
|
|
|
|
|
| 778 |
.status-dot.active { background: var(--accent2); box-shadow: 0 0 5px var(--accent2); }
|
| 779 |
/* ── Model badges ── */
|
| 780 |
.model-badge {
|
| 781 |
+
display: inline-block; padding: 2px 7px; border-radius: 4px;
|
| 782 |
+
font-size: 9px; font-weight: 700; letter-spacing: 0.06em; text-transform: uppercase;
|
|
|
|
| 783 |
}
|
| 784 |
+
.model-badge.qvl2b { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
|
| 785 |
+
.model-badge.qvl4b { background: rgba(255,100,80,0.15); color: #ff6450; border: 1px solid rgba(255,100,80,0.35); }
|
| 786 |
.model-badge.q4bunred { background: rgba(255,80,80,0.18); color: #ff5050; border: 1px solid rgba(255,80,80,0.40); }
|
| 787 |
.model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
|
| 788 |
.model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
|
|
|
|
|
|
|
| 789 |
.model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
|
| 790 |
.model-badge.g4e2b { background: rgba(66,197,107,0.15); color: #42c56b; border: 1px solid rgba(66,197,107,0.35); }
|
| 791 |
.model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
|
| 792 |
.model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
|
| 793 |
.model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
|
| 794 |
+
.model-info-box { border-radius: 6px; padding: 9px; font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0; }
|
|
|
|
|
|
|
|
|
|
| 795 |
.canvas-footer { height: 36px; }
|
| 796 |
</style>
|
| 797 |
</head>
|
|
|
|
| 823 |
<label>Upload Image</label>
|
| 824 |
<div class="file-upload" id="dropZone">
|
| 825 |
<svg width="30" height="30" viewBox="0 0 24 24" fill="none"
|
| 826 |
+
stroke="#7c6af7" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
|
|
|
|
| 827 |
<rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
|
| 828 |
<circle cx="8.5" cy="8.5" r="1.5"/>
|
| 829 |
<polyline points="21 15 16 10 5 21"/>
|
|
|
|
| 835 |
<img id="imgPreview" class="img-preview" />
|
| 836 |
<button class="clear-btn" id="clearBtn" title="Remove image">
|
| 837 |
<svg width="12" height="12" viewBox="0 0 24 24" fill="none"
|
| 838 |
+
stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
|
|
|
|
| 839 |
<line x1="18" y1="6" x2="6" y2="18"/>
|
| 840 |
<line x1="6" y1="6" x2="18" y2="18"/>
|
| 841 |
</svg>
|
|
|
|
| 861 |
<div>
|
| 862 |
<label>Active Model</label>
|
| 863 |
<select id="modelSelect">
|
| 864 |
+
<option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
|
| 865 |
+
<option value="qwen_vl_4b">Qwen3-VL-4B-Instruct</option>
|
| 866 |
<option value="qwen_4b_unredacted">Qwen3.5-4B-Unredacted-MAX</option>
|
| 867 |
<option value="qwen_4b">Qwen3.5-4B</option>
|
| 868 |
<option value="qwen_2b">Qwen3.5-2B</option>
|
|
|
|
|
|
|
| 869 |
<option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
|
| 870 |
<option value="gemma4_e2b">Gemma4-E2B-it (Google)</option>
|
| 871 |
<option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
|
|
|
|
| 874 |
</select>
|
| 875 |
</div>
|
| 876 |
<div id="modelInfoBox" class="model-info-box"
|
| 877 |
+
style="background:rgba(255,150,50,0.07);border:1px solid rgba(255,150,50,0.3);">
|
| 878 |
+
<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
|
| 879 |
+
Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
|
| 880 |
+
Strong spatial grounding, OCR & instruction-following.
|
| 881 |
</div>
|
| 882 |
<div style="flex:1;"></div>
|
| 883 |
</div>
|
|
|
|
| 924 |
<div class="output-node-body">
|
| 925 |
<div class="output-header-row">
|
| 926 |
<label style="margin-bottom:0;">Streamed Result</label>
|
| 927 |
+
<button class="icon-btn" id="copyBtn" title="Copy result to clipboard">
|
| 928 |
<svg width="11" height="11" viewBox="0 0 24 24" fill="none"
|
| 929 |
+
stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
|
|
|
|
| 930 |
<rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
|
| 931 |
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
|
| 932 |
</svg>
|
|
|
|
| 944 |
<span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
|
| 945 |
<span class="id">ID: 05</span>
|
| 946 |
</div>
|
| 947 |
+
<div class="ground-node-body">
|
| 948 |
+
<div class="ground-header-row">
|
| 949 |
+
<label style="margin-bottom:0;">Point / Detect Overlay</label>
|
| 950 |
+
<a class="icon-btn teal" id="downloadBtn" title="Download overlay image" style="display:none;">
|
| 951 |
+
<svg width="11" height="11" viewBox="0 0 24 24" fill="none"
|
| 952 |
+
stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
|
| 953 |
+
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
| 954 |
+
<polyline points="7 10 12 15 17 10"/>
|
| 955 |
+
<line x1="12" y1="15" x2="12" y2="3"/>
|
| 956 |
+
</svg>
|
| 957 |
+
SAVE
|
| 958 |
+
</a>
|
| 959 |
+
</div>
|
| 960 |
<div class="ground-canvas-wrap">
|
| 961 |
<canvas id="groundCanvas"></canvas>
|
| 962 |
<div class="ground-placeholder" id="groundPlaceholder">
|
|
|
|
| 976 |
const canvasEl = document.getElementById('canvas');
|
| 977 |
function portCenter(id) {
|
| 978 |
const el = document.getElementById(id);
|
| 979 |
+
if (!el) return {x:0,y:0};
|
| 980 |
+
const er = el.getBoundingClientRect(), cr = canvasEl.getBoundingClientRect();
|
|
|
|
| 981 |
return { x: er.left + er.width/2 - cr.left, y: er.top + er.height/2 - cr.top };
|
| 982 |
}
|
| 983 |
function bezier(p1, p2) {
|
|
|
|
| 1004 |
const header = node.querySelector('.node-header');
|
| 1005 |
let drag = false, sx, sy, il, it;
|
| 1006 |
header.addEventListener('mousedown', e => {
|
| 1007 |
+
drag=true; sx=e.clientX; sy=e.clientY;
|
| 1008 |
+
il=parseInt(node.style.left)||0; it=parseInt(node.style.top)||0;
|
| 1009 |
+
node.style.zIndex=100; e.preventDefault();
|
| 1010 |
});
|
| 1011 |
document.addEventListener('mousemove', e => {
|
| 1012 |
if (!drag) return;
|
| 1013 |
+
node.style.left=`${il+e.clientX-sx}px`; node.style.top=`${it+e.clientY-sy}px`;
|
|
|
|
| 1014 |
updateWires();
|
| 1015 |
});
|
| 1016 |
+
document.addEventListener('mouseup', () => { if(drag){drag=false;node.style.zIndex=10;} });
|
| 1017 |
});
|
| 1018 |
window.addEventListener('resize', updateWires);
|
| 1019 |
window.addEventListener('scroll', updateWires);
|
|
|
|
| 1035 |
const dotImg = document.getElementById('dot-img');
|
| 1036 |
|
| 1037 |
function formatBytes(b) {
|
| 1038 |
+
if (b<1024) return b+' B'; if (b<1048576) return (b/1024).toFixed(1)+' KB';
|
| 1039 |
+
return (b/1048576).toFixed(1)+' MB';
|
|
|
|
| 1040 |
}
|
| 1041 |
function handleFile(file) {
|
| 1042 |
+
if (!file||!file.type.startsWith('image/')) return;
|
| 1043 |
+
currentFile=file; imgPreview.src=URL.createObjectURL(file);
|
| 1044 |
+
previewWrap.classList.add('visible'); dropZone.style.display='none';
|
| 1045 |
+
chipName.textContent=file.name; chipSize.textContent=formatBytes(file.size);
|
| 1046 |
+
imgChip.classList.add('visible'); dotImg.classList.add('active');
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1047 |
requestAnimationFrame(updateWires);
|
| 1048 |
}
|
| 1049 |
function clearImage() {
|
| 1050 |
+
currentFile=null; imgPreview.src=''; previewWrap.classList.remove('visible');
|
| 1051 |
+
dropZone.style.display=''; imgChip.classList.remove('visible');
|
| 1052 |
+
chipName.textContent='—'; chipSize.textContent=''; fileInput.value='';
|
| 1053 |
+
dotImg.classList.remove('active'); requestAnimationFrame(updateWires);
|
|
|
|
|
|
|
|
|
|
| 1054 |
}
|
| 1055 |
dropZone.onclick = () => fileInput.click();
|
| 1056 |
fileInput.onchange = e => handleFile(e.target.files[0]);
|
|
|
|
| 1071 |
dotModel.classList.add('active');
|
| 1072 |
|
| 1073 |
const MODEL_INFO = {
|
| 1074 |
+
qwen_vl_2b: {
|
| 1075 |
+
html: `<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
|
| 1076 |
+
Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
|
| 1077 |
+
Strong spatial grounding, OCR & instruction-following.`,
|
| 1078 |
+
bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.30)',
|
| 1079 |
+
},
|
| 1080 |
+
qwen_vl_4b: {
|
| 1081 |
+
html: `<span class="model-badge qvl4b">QWEN3-VL · 4B</span><br><br>
|
| 1082 |
+
Qwen3-VL-4B-Instruct — enhanced vision-language model by Alibaba Cloud.
|
| 1083 |
+
Superior spatial grounding, richer OCR & stronger multi-step reasoning.`,
|
| 1084 |
+
bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
|
| 1085 |
+
},
|
| 1086 |
qwen_4b_unredacted: {
|
| 1087 |
html: `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
|
| 1088 |
Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
|
|
|
|
| 1101 |
Lightweight & fast — ideal for quick Query, Caption, Point & Detect tasks.`,
|
| 1102 |
bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
|
| 1103 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1104 |
lfm_450: {
|
| 1105 |
html: `<span class="model-badge lfm450">LFM · 450M</span><br><br>
|
| 1106 |
LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
|
|
|
|
| 1151 |
Point: 'e.g., The gun held by the person.',
|
| 1152 |
Detect: 'e.g., The headlight of the car.',
|
| 1153 |
};
|
| 1154 |
+
categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value]||''; };
|
| 1155 |
|
| 1156 |
// ══════════════════════════════════════════════
|
| 1157 |
// ROBUST JSON EXTRACTOR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1158 |
// ══════════════════════════════════════════════
|
| 1159 |
function extractGroundingJSON(raw) {
|
| 1160 |
+
// 1. Strip all <think>…</think> blocks (multi-pass)
|
| 1161 |
+
let text = raw, prev = null;
|
|
|
|
|
|
|
|
|
|
| 1162 |
while (prev !== text) {
|
| 1163 |
prev = text;
|
| 1164 |
text = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
| 1165 |
}
|
| 1166 |
+
// 2. Strip markdown fences, keep inner content
|
| 1167 |
+
text = text.replace(/```(?:json)?([\s\S]*?)```/gi, '$1');
|
| 1168 |
+
text = text.replace(/```/g, '').trim();
|
| 1169 |
|
| 1170 |
+
// Balanced bracket extractor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1171 |
function extractBalanced(str, startIdx, openCh, closeCh) {
|
| 1172 |
+
let depth=0, inStr=false, esc=false;
|
| 1173 |
+
for (let i=startIdx; i<str.length; i++) {
|
| 1174 |
+
const c=str[i];
|
| 1175 |
+
if (esc) { esc=false; continue; }
|
| 1176 |
+
if (c==='\\\\') { esc=true; continue; }
|
| 1177 |
+
if (c==='"') { inStr=!inStr; continue; }
|
| 1178 |
+
if (inStr) continue;
|
| 1179 |
+
if (c===openCh) depth++;
|
| 1180 |
+
if (c===closeCh) {
|
| 1181 |
depth--;
|
| 1182 |
+
if (depth===0) {
|
| 1183 |
+
try { return JSON.parse(str.slice(startIdx, i+1)); } catch(_) { return null; }
|
|
|
|
| 1184 |
}
|
| 1185 |
}
|
| 1186 |
}
|
| 1187 |
return null;
|
| 1188 |
}
|
| 1189 |
|
| 1190 |
+
// Search from the END — models emit JSON after reasoning prose
|
| 1191 |
+
for (let i=text.length-1; i>=0; i--) {
|
| 1192 |
+
if (text[i]==='[') { const r=extractBalanced(text,i,'[',']'); if(r!==null) return r; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1193 |
}
|
| 1194 |
+
for (let i=text.length-1; i>=0; i--) {
|
| 1195 |
+
if (text[i]==='{') { const r=extractBalanced(text,i,'{','}'); if(r!==null) return r; }
|
|
|
|
| 1196 |
}
|
| 1197 |
+
// Fallback: search from start
|
| 1198 |
+
const fa=text.indexOf('['); if(fa!==-1){const r=extractBalanced(text,fa,'[',']');if(r!==null)return r;}
|
| 1199 |
+
const fo=text.indexOf('{'); if(fo!==-1){const r=extractBalanced(text,fo,'{','}');if(r!==null)return r;}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1200 |
try { return JSON.parse(text); } catch(_) {}
|
| 1201 |
return null;
|
| 1202 |
}
|
|
|
|
| 1207 |
const groundCanvas = document.getElementById('groundCanvas');
|
| 1208 |
const groundPlaceholder = document.getElementById('groundPlaceholder');
|
| 1209 |
const gCtx = groundCanvas.getContext('2d');
|
| 1210 |
+
const downloadBtn = document.getElementById('downloadBtn');
|
| 1211 |
|
| 1212 |
const PALETTE = ['#4ecdc4','#7c6af7','#ff6b6b','#ffd93d','#6bcb77','#ff922b','#cc5de8','#339af0'];
|
| 1213 |
|
| 1214 |
function hexToRgba(hex, alpha) {
|
| 1215 |
+
const r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
|
|
|
|
|
|
|
| 1216 |
return `rgba(${r},${g},${b},${alpha})`;
|
| 1217 |
}
|
| 1218 |
function roundRect(ctx, x, y, w, h, r) {
|
| 1219 |
+
ctx.beginPath(); ctx.moveTo(x+r,y);
|
|
|
|
| 1220 |
ctx.lineTo(x+w-r,y); ctx.quadraticCurveTo(x+w,y,x+w,y+r);
|
| 1221 |
ctx.lineTo(x+w,y+h-r); ctx.quadraticCurveTo(x+w,y+h,x+w-r,y+h);
|
| 1222 |
ctx.lineTo(x+r,y+h); ctx.quadraticCurveTo(x,y+h,x,y+h-r);
|
| 1223 |
+
ctx.lineTo(x,y+r); ctx.quadraticCurveTo(x,y,x+r,y); ctx.closePath();
|
| 1224 |
+
}
|
| 1225 |
+
|
| 1226 |
+
function updateDownloadBtn() {
|
| 1227 |
+
// Build a timestamped filename and update the anchor href
|
| 1228 |
+
const dataURL = groundCanvas.toDataURL('image/png');
|
| 1229 |
+
const ts = new Date().toISOString().replace(/[:.]/g,'-').slice(0,19);
|
| 1230 |
+
downloadBtn.href = dataURL;
|
| 1231 |
+
downloadBtn.download = `grounding_${ts}.png`;
|
| 1232 |
+
downloadBtn.style.display = 'flex';
|
| 1233 |
}
|
| 1234 |
|
| 1235 |
function drawGrounding(imgSrc, rawText) {
|
| 1236 |
const parsed = extractGroundingJSON(rawText);
|
| 1237 |
+
if (!parsed) { console.warn('Grounding: no JSON found in:', rawText.slice(0,200)); return; }
|
|
|
|
|
|
|
|
|
|
| 1238 |
|
| 1239 |
const img = new Image();
|
| 1240 |
img.onload = () => {
|
| 1241 |
+
const W=img.naturalWidth, H=img.naturalHeight;
|
| 1242 |
+
groundCanvas.width=W; groundCanvas.height=H;
|
|
|
|
| 1243 |
gCtx.drawImage(img, 0, 0);
|
| 1244 |
+
groundPlaceholder.style.display='none';
|
| 1245 |
|
| 1246 |
+
const lw=Math.max(2,W/200), fs=Math.max(12,W/40);
|
| 1247 |
+
gCtx.lineWidth=lw;
|
| 1248 |
+
gCtx.font=`bold ${fs}px JetBrains Mono, monospace`;
|
|
|
|
| 1249 |
|
| 1250 |
const items = Array.isArray(parsed) ? parsed : [parsed];
|
|
|
|
| 1251 |
items.forEach((item, i) => {
|
| 1252 |
const col = PALETTE[i % PALETTE.length];
|
| 1253 |
|
| 1254 |
+
// ── Bounding box ─────────────────────────────
|
| 1255 |
let bbox = null;
|
| 1256 |
+
if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length===4) bbox=item.bbox_2d;
|
| 1257 |
+
else if (Array.isArray(item?.bbox) && item.bbox.length===4) bbox=item.bbox;
|
| 1258 |
+
else if (Array.isArray(item) && item.length===4 && item.every(n=>typeof n==='number')) bbox=item;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1259 |
|
| 1260 |
if (bbox) {
|
| 1261 |
+
let [x1,y1,x2,y2]=bbox.map(Number);
|
| 1262 |
+
if (x1<=1&&y1<=1&&x2<=1&&y2<=1) { x1*=W;y1*=H;x2*=W;y2*=H; }
|
| 1263 |
+
if (x2<x1)[x1,x2]=[x2,x1]; if (y2<y1)[y1,y2]=[y2,y1];
|
| 1264 |
+
const bw=x2-x1, bh=y2-y1;
|
| 1265 |
+
const lbl=(item?.label??`obj ${i+1}`).toString();
|
| 1266 |
+
gCtx.fillStyle=hexToRgba(col,0.20); gCtx.fillRect(x1,y1,bw,bh);
|
| 1267 |
+
gCtx.strokeStyle=col; gCtx.lineWidth=lw; gCtx.strokeRect(x1,y1,bw,bh);
|
| 1268 |
+
const tw=gCtx.measureText(lbl).width, ph=fs*1.45, pw=tw+12;
|
| 1269 |
+
const lx=x1, ly=Math.max(0,y1-ph);
|
| 1270 |
+
gCtx.fillStyle=col; roundRect(gCtx,lx,ly,pw,ph,4); gCtx.fill();
|
| 1271 |
+
gCtx.fillStyle='#fff'; gCtx.fillText(lbl,lx+6,ly+ph*0.76);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1272 |
return;
|
| 1273 |
}
|
| 1274 |
|
| 1275 |
+
// ── Point ────────────────────────────────────
|
| 1276 |
let pt = null;
|
| 1277 |
+
if (Array.isArray(item?.point_2d) && item.point_2d.length===2) pt=item.point_2d;
|
| 1278 |
+
else if (Array.isArray(item?.point) && item.point.length===2) pt=item.point;
|
| 1279 |
+
else if (Array.isArray(item) && item.length===2 && item.every(n=>typeof n==='number')) pt=item;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1280 |
|
| 1281 |
if (pt) {
|
| 1282 |
+
let [x,y]=pt.map(Number);
|
| 1283 |
+
if (x<=1&&y<=1){x*=W;y*=H;}
|
| 1284 |
+
const r=Math.max(8,W/60);
|
| 1285 |
+
const lbl=(item?.label??`pt ${i+1}`).toString();
|
| 1286 |
+
gCtx.beginPath(); gCtx.arc(x,y,r*1.8,0,Math.PI*2);
|
| 1287 |
+
gCtx.fillStyle=hexToRgba(col,0.18); gCtx.fill();
|
| 1288 |
+
gCtx.beginPath(); gCtx.arc(x,y,r,0,Math.PI*2);
|
| 1289 |
+
gCtx.fillStyle=col; gCtx.fill();
|
| 1290 |
+
gCtx.strokeStyle='#fff'; gCtx.lineWidth=Math.max(1.5,lw); gCtx.stroke();
|
| 1291 |
+
gCtx.fillStyle='#fff'; gCtx.fillText(lbl,x+r+5,y+fs*0.4);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1292 |
}
|
| 1293 |
});
|
| 1294 |
+
|
| 1295 |
+
// Enable download button after drawing
|
| 1296 |
+
updateDownloadBtn();
|
| 1297 |
};
|
| 1298 |
+
img.onerror = () => console.error('Grounding: failed to load image.');
|
| 1299 |
img.src = imgSrc;
|
| 1300 |
}
|
| 1301 |
|
|
|
|
| 1310 |
copyBtn.classList.remove('copied');
|
| 1311 |
copyBtn.innerHTML = `
|
| 1312 |
<svg width="11" height="11" viewBox="0 0 24 24" fill="none"
|
| 1313 |
+
stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
|
|
|
|
| 1314 |
<rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
|
| 1315 |
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
|
| 1316 |
</svg> COPY`;
|
| 1317 |
}
|
|
|
|
| 1318 |
copyBtn.onclick = () => {
|
| 1319 |
+
const txt = outputBox.innerText||'';
|
| 1320 |
+
if (!txt||txt==='Results will stream here...') return;
|
| 1321 |
navigator.clipboard.writeText(txt).then(() => {
|
| 1322 |
copyBtn.classList.add('copied');
|
| 1323 |
copyBtn.innerHTML = `
|
| 1324 |
<svg width="11" height="11" viewBox="0 0 24 24" fill="none"
|
| 1325 |
+
stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
|
|
|
|
| 1326 |
<polyline points="20 6 9 17 4 12"/>
|
| 1327 |
</svg> COPIED`;
|
| 1328 |
+
clearTimeout(copyTimer); copyTimer=setTimeout(resetCopyBtn,2000);
|
|
|
|
| 1329 |
}).catch(() => {
|
| 1330 |
+
const ta=document.createElement('textarea'); ta.value=txt;
|
| 1331 |
+
ta.style.position='fixed'; ta.style.opacity='0';
|
| 1332 |
+
document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);
|
|
|
|
| 1333 |
});
|
| 1334 |
};
|
| 1335 |
|
|
|
|
| 1348 |
const promptStr = promptInput.value.trim();
|
| 1349 |
if (!promptStr) { alert('Please enter a prompt directive.'); return; }
|
| 1350 |
|
| 1351 |
+
runBtn.disabled=true; btnLoader.style.display='inline-block';
|
| 1352 |
+
outputBox.innerText=''; outputBox.style.color='';
|
| 1353 |
+
groundPlaceholder.style.display='flex';
|
| 1354 |
+
gCtx.clearRect(0,0,groundCanvas.width,groundCanvas.height);
|
| 1355 |
+
downloadBtn.style.display='none';
|
|
|
|
| 1356 |
dotTask.classList.add('active');
|
| 1357 |
+
dotOut.classList.remove('active'); dotGnd.classList.remove('active');
|
| 1358 |
+
allWires.forEach(id=>document.getElementById(id)?.classList.add('active'));
|
|
|
|
| 1359 |
resetCopyBtn();
|
| 1360 |
|
| 1361 |
+
const formData=new FormData();
|
| 1362 |
formData.append('image', currentFile);
|
| 1363 |
formData.append('category', categorySelect.value);
|
| 1364 |
formData.append('prompt', promptStr);
|
| 1365 |
formData.append('model_id', modelSelect.value);
|
| 1366 |
|
| 1367 |
+
let fullText='';
|
| 1368 |
+
const imgObjectURL=URL.createObjectURL(currentFile);
|
| 1369 |
|
| 1370 |
try {
|
| 1371 |
+
const response=await fetch('/api/run',{method:'POST',body:formData});
|
| 1372 |
+
if (!response.ok) { const err=await response.json(); throw new Error(err.error||'Execution failed.'); }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1373 |
|
| 1374 |
+
const reader=response.body.getReader(), decoder=new TextDecoder('utf-8');
|
| 1375 |
+
let buffer='';
|
| 1376 |
while (true) {
|
| 1377 |
+
const {value,done}=await reader.read(); if(done)break;
|
| 1378 |
+
buffer+=decoder.decode(value,{stream:true});
|
| 1379 |
+
const lines=buffer.split('\\n\\n'); buffer=lines.pop();
|
|
|
|
|
|
|
| 1380 |
for (const line of lines) {
|
| 1381 |
if (!line.startsWith('data: ')) continue;
|
| 1382 |
+
const payload=line.replace('data: ','');
|
| 1383 |
+
if (payload==='[DONE]') break;
|
| 1384 |
try {
|
| 1385 |
+
const data=JSON.parse(payload);
|
| 1386 |
+
if (data.chunk) { fullText+=data.chunk; outputBox.innerText=fullText; outputBox.scrollTop=outputBox.scrollHeight; }
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1387 |
} catch(_) {}
|
| 1388 |
}
|
| 1389 |
}
|
| 1390 |
|
| 1391 |
dotOut.classList.add('active');
|
| 1392 |
|
| 1393 |
+
// Grounding overlay for Point / Detect
|
| 1394 |
+
const cat=categorySelect.value;
|
| 1395 |
+
if ((cat==='Point'||cat==='Detect') && fullText.trim()) {
|
| 1396 |
+
const parsed=extractGroundingJSON(fullText);
|
| 1397 |
+
if (parsed!==null) {
|
| 1398 |
dotGnd.classList.add('active');
|
| 1399 |
drawGrounding(imgObjectURL, fullText);
|
| 1400 |
} else {
|
|
|
|
| 1402 |
}
|
| 1403 |
}
|
| 1404 |
|
| 1405 |
+
} catch(err) {
|
| 1406 |
+
outputBox.innerText=`[Error] ${err.message}`; outputBox.style.color='#ff6b6b';
|
|
|
|
| 1407 |
} finally {
|
| 1408 |
+
runBtn.disabled=false; btnLoader.style.display='none';
|
|
|
|
| 1409 |
dotTask.classList.remove('active');
|
| 1410 |
+
allWires.forEach(id=>document.getElementById(id)?.classList.remove('active'));
|
| 1411 |
}
|
| 1412 |
};
|
| 1413 |
</script>
|
|
|
|
| 1416 |
"""
|
| 1417 |
|
| 1418 |
if __name__ == "__main__":
|
| 1419 |
+
app.launch(show_error=True)
|