FitCheck / engine /ui_adapter.py
cn0303's picture
Speed predictions with receipts: bandwidth roofline, real-runs chart, honest provenance
ee8ca43 verified
"""
Adapter: turn a frontend payload into the exact JSON the static/ frontend
renders, using the REAL deterministic engine (not the placeholder).
The frontend speaks one contract (verdicts ``great|tight|no``, an options list,
a gauge, tools, commands). The engine speaks another (``works_now|compromises|
dont_bother`` over ``ModelVerdict`` objects). This module is the seam between
them, so neither side has to know about the other.
Scope: the engine currently models the **LLM** family only (its model classes
are all text models). Vision / image-gen / audio / data goals still fall back to
the input-aware placeholder in ``app.py`` — that boundary is deliberate and
honest, not an oversight. ``is_llm_usecase`` below is the routing switch.
"""
import re
from .advisor import (
advise,
VERDICT_WORKS,
VERDICT_COMPROMISE,
VERDICT_NO,
)
from .catalogue import MODEL_CLASSES
from .explain import speed_hint, ollama_command, llamacpp_command
from .hardware import HardwareSpec
# Bands: engine verdict -> the colour-key the frontend understands.
_VERDICT_UI = {
VERDICT_WORKS: "great",
VERDICT_COMPROMISE: "tight",
VERDICT_NO: "no",
}
_VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"}
# Gauge breakdown colours (match the placeholder palette in app.py / style.css).
_C_MODEL = "#818CF8" # the weights themselves
_C_WORK = "#868E9C" # chat memory + working space
# Goals the engine can answer for real. Everything LLM-shaped maps onto a chat
# context; "translate"/"custom" are still language models, so they route here.
_LLM_USECASES = {
"chat", "writing", "coding", "agents", "rag", "finetune", "translate", "custom",
}
# The engine's own use-case keys. Frontend ids that aren't 1:1 get mapped.
_USECASE_ALIAS = {"translate": "chat", "custom": "chat"}
def is_llm_usecase(usecase: str) -> bool:
"""True if the real engine should answer this goal (vs. the placeholder)."""
return usecase in _LLM_USECASES
# --------------------------------------------------------------------------
# Frontend payload -> HardwareSpec
# --------------------------------------------------------------------------
def _num_in(text: str) -> float:
"""First '<number> GB' figure in a string, else 0."""
m = re.search(r"(\d+(?:\.\d+)?)\s*GB", text or "", re.I)
return float(m.group(1)) if m else 0.0
def spec_from_payload(p: dict) -> HardwareSpec:
"""Build a HardwareSpec straight from the frontend's gather() payload.
We construct the spec directly rather than going through build_spec(),
because the frontend carries the vendor and a VRAM-bearing label already,
and an Advanced box can override VRAM outright.
"""
computer = (p.get("computer") or "Windows laptop")
kind = computer.lower()
provider = (p.get("provider") or "none").lower()
ram = float(p.get("ram_gb") or 16)
# --- Apple Silicon: unified memory, no separate VRAM -------------------
if "mac" in kind or provider == "apple":
chip = p.get("gpu") or "Apple Silicon" # keep the tier (Pro/Max/Ultra) for bandwidth lookup
return HardwareSpec(
os="macos", ram_gb=ram, gpu_vendor="apple", vram_gb=0.0,
is_apple_silicon=True,
gpu_label=f"{chip} (shares your {ram:g} GB of memory)",
form_factor="mac",
)
# --- Raspberry Pi / mini PC -------------------------------------------
if "raspberry" in kind or "mini" in kind:
return HardwareSpec(
os="linux", ram_gb=ram, gpu_vendor="none", vram_gb=0.0,
gpu_label="No dedicated graphics card (tiny computer)",
form_factor="sbc",
)
os_name = "linux" if "linux" in kind else "windows"
form = "desktop" if "desktop" in kind else "laptop"
# VRAM: Advanced override wins; else the picker label; else a paste guess.
vram = p.get("vram_gb")
if not vram:
vram = _num_in(p.get("gpu", "")) or _num_in(p.get("paste", ""))
vram = float(vram or 0)
if provider == "nvidia":
vendor = "nvidia"
elif provider == "amd":
vendor = "amd"
elif provider == "intel":
vendor = "intel"
else:
vendor = "none" # "none" / "unsure": treat as no fast path
vram = 0.0
label = p.get("gpu") or "No dedicated graphics card (built-in graphics only)"
return HardwareSpec(
os=os_name, ram_gb=ram, gpu_vendor=vendor, vram_gb=vram,
is_apple_silicon=False, gpu_label=label, form_factor=form,
)
# --------------------------------------------------------------------------
# Advice -> frontend JSON
# --------------------------------------------------------------------------
def _where(spec: HardwareSpec, verdict: str) -> str:
if verdict == "great":
if spec.is_apple_silicon:
return "on your Mac"
if spec.has_fast_path:
return "on your graphics card"
return "on your computer"
if verdict == "tight":
return "using your computer's memory"
return ""
def advise_for_ui(payload: dict, catalogue_version: str) -> dict:
"""Run the real engine and shape its output for static/app.js render()."""
usecase = _USECASE_ALIAS.get(payload.get("usecase", "chat"), payload.get("usecase", "chat"))
spec = spec_from_payload(payload)
adv = advise(spec, usecase)
fast = spec.fast_budget_gb
total = spec.total_budget_gb
# ---- Options table (already biggest -> smallest from the engine) -----
options = []
for v in adv.verdicts:
ui_v = _VERDICT_UI[v.verdict]
options.append({
"verdict": ui_v,
"model": v.model.plain_name,
"desc": v.model.good_for,
"setting": v.quant.plain_name,
"memory": "Too big" if v.verdict == VERDICT_NO else f"{v.estimate.total_gb:g} GB",
"feel": speed_hint(v, spec),
})
# ---- Headline ---------------------------------------------------------
h = adv.headline
hv = _VERDICT_UI[h.verdict] if h else "no"
where = _where(spec, hv)
if h and hv == "great":
headline = f"Yes, you can run a {h.model.plain_name} model {where}, today."
elif h and hv == "tight":
headline = f"Sort of. A {h.model.plain_name} model will run {where}, with trade-offs."
else:
headline = "This goal is a stretch on this machine. Here's the honest picture."
if h:
est = h.estimate
need_gb = est.total_gb
detail = (
f"For this goal, the sweet spot is a <b>{h.model.plain_name}</b> model "
f"at the <b>{h.quant.plain_name}</b> setting. {h.model.good_for} "
f"It needs about <b>{need_gb:g} GB</b> "
f"(model {est.weights_gb:g} GB + chat memory {est.kv_cache_gb:g} GB "
f"+ working space {est.overhead_gb:g} GB), and you have roughly "
f"<b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total to work with."
)
else:
# Nothing fits even squeezed: be honest, show the shortfall.
smallest = adv.verdicts[-1]
need_gb = smallest.estimate.total_gb
detail = (
f"Even the smallest model here needs about <b>{need_gb:g} GB</b>, "
f"but this machine can offer only about <b>{total:g} GB</b> once the "
f"operating system has its share. That's not a failure — small "
f"computers just have small budgets. Adding memory, or a free cloud "
f"option, would open this up."
)
# Notes: use-case caveat + the headline's own honest footnotes.
note_bits = []
if adv.use_case.note:
note_bits.append(adv.use_case.note)
if h and h.notes:
note_bits.extend(h.notes)
note = " ".join(note_bits)
# ---- Gauge ------------------------------------------------------------
scale = max(total, need_gb, 1) * 1.05
if h:
model_part = round(h.estimate.weights_gb, 1)
work_part = round(need_gb - model_part, 1)
else:
model_part = round(need_gb * 0.8, 1)
work_part = round(need_gb * 0.2, 1)
gauge = {
"need_gb": f"{need_gb:g} GB needed",
"fast_gb": f"{fast:g} GB",
"total_gb": f"{total:g} GB",
"fill_pct": round(need_gb / scale * 100, 1),
"mark_pct": round(fast / scale * 100, 1),
"breakdown": [
{"label": f"Model {model_part:g} GB", "color": _C_MODEL},
{"label": f"Working space {work_part:g} GB", "color": _C_WORK},
],
}
# ---- Tools (runtimes) -------------------------------------------------
tools = [{
"name": r.name, "what": r.plain_what,
"install": r.install_hint, "tag": r.difficulty,
} for r in adv.runtimes]
# ---- Commands ---------------------------------------------------------
cmd_intro = ("These get you a running model in minutes. Pick the easy one or "
"the power one; they do the same job.")
if h:
commands = {"intro": cmd_intro, "items": [
{"label": "Easy way (Ollama)", "code": ollama_command(h)},
{"label": "Power way (llama.cpp)", "code": llamacpp_command(h)},
]}
else:
tiny = MODEL_CLASSES[0]
commands = {"intro": cmd_intro, "items": [
{"label": "Smallest you could try (Ollama)", "code": f"ollama run {tiny.ollama_tag}"},
]}
return {
"catalogue_version": catalogue_version,
"verdict": hv,
"verdict_word": _VERDICT_WORD[hv],
"headline": headline,
"detail": detail,
"note": note,
"gauge": gauge,
"options": options,
"tools": tools,
"commands": commands,
# Echoed back so the model brick can narrate the SAME numbers the UI shows.
"meets_goal": adv.meets_goal,
"use_case": adv.use_case.plain_name,
}