FitCheck / engine /explain.py
cn0303's picture
Deploy FitCheck: engine + Nemotron model brick on ZeroGPU
12d2e34 verified
"""
Putting it in plain words.
The advisor produces structured facts; this module turns them into sentences a
non-technical person actually understands, and into commands they can copy and
paste. No jargon survives here without being explained.
"""
from .advisor import (
Advice,
ModelVerdict,
VERDICT_WORKS,
VERDICT_COMPROMISE,
VERDICT_NO,
)
VERDICT_EMOJI = {
VERDICT_WORKS: "🟢",
VERDICT_COMPROMISE: "🟡",
VERDICT_NO: "🔴",
}
VERDICT_WORD = {
VERDICT_WORKS: "Works now",
VERDICT_COMPROMISE: "Works, with compromises",
VERDICT_NO: "Don't bother",
}
def speed_hint(v: ModelVerdict, spec) -> str:
"""A rough, honest feel for how fast replies will come."""
if v.verdict == VERDICT_NO:
return "—"
if v.verdict == VERDICT_COMPROMISE:
return "Slow — usable for short tasks, not snappy chat."
# Works now (fast path). Bigger models are still slower even on a GPU.
if v.model.billions <= 4:
return "Fast — replies feel instant."
if v.model.billions <= 14:
return "Comfortable — quick enough for live chat."
return "Steady — fine, just not instant on big answers."
# --------------------------------------------------------------------------
# Commands
# --------------------------------------------------------------------------
def ollama_command(v: ModelVerdict) -> str:
return f"ollama run {v.model.ollama_tag}"
def llamacpp_command(v: ModelVerdict) -> str:
# llama.cpp can pull a GGUF straight from Hugging Face by repo:quant.
return (f"llama-server -hf {v.model.gguf_repo}:{v.quant.key} "
f"-c {v.estimate.context_tokens}")
# --------------------------------------------------------------------------
# Headline summary, in human words
# --------------------------------------------------------------------------
def headline_text(advice: Advice) -> str:
spec = advice.spec
uc = advice.use_case
h = advice.headline
if h is None:
return (
f"**Honest answer: this machine can't comfortably run local AI "
f"for {uc.plain_name.lower()} yet.**\n\n"
f"Even the smallest models need more memory than the "
f"{spec.ram_gb:g} GB available here once everything else is "
f"running. That's not a failure — small computers just have small "
f"budgets. A free cloud option, or adding memory, would open this up."
)
m = h.model
q = h.quant
fast = "on the graphics card" if spec.has_fast_path and h.verdict == VERDICT_WORKS else "on the processor"
if h.verdict == VERDICT_WORKS:
lead = f"**Yes — you can run a {m.plain_name} model {fast}, today.**"
elif h.verdict == VERDICT_COMPROMISE:
lead = f"**Sort of — a {m.plain_name} model will run, but with trade-offs.**"
else:
lead = f"**Not really — even a {m.plain_name} model is a stretch here.**"
body = (
f"\n\nFor **{uc.plain_name.lower()}**, the sweet spot on your machine is a "
f"**{m.plain_name}** model at the **{q.plain_name}** setting. "
f"{m.good_for}\n\n"
f"That needs about **{h.estimate.total_gb:g} GB** of memory "
f"(model {h.estimate.weights_gb:g} GB + chat memory "
f"{h.estimate.kv_cache_gb:g} GB + working space {h.estimate.overhead_gb:g} GB), "
f"and you have roughly **{spec.fast_budget_gb:g} GB** fast / "
f"**{spec.total_budget_gb:g} GB** total to play with."
)
extra = ""
if uc.note:
extra += f"\n\n*Note for this job:* {uc.note}"
if h.notes:
extra += "\n\n" + "\n".join(f"- {n}" for n in h.notes)
return lead + body + extra
def jargon_glossary() -> str:
return (
"**Plain-English glossary**\n\n"
"- **Model** — the AI's 'brain'. Bigger = smarter but heavier.\n"
"- **Parameters (e.g. 7B)** — how big the brain is. 7B = 7 billion. "
"More = smarter and hungrier for memory.\n"
"- **Quantisation (4-bit, 8-bit)** — shrinking the model so it fits. "
"4-bit is the popular sweet spot: much smaller, barely-noticeable quality loss.\n"
"- **VRAM** — the fast memory on a graphics card. The single biggest "
"factor in what you can run quickly.\n"
"- **RAM** — your computer's normal memory. Models can use it too, but it's slower.\n"
"- **KV cache / 'chat memory'** — scratch space the model uses to "
"remember the current conversation. Longer chats use more.\n"
"- **GGUF** — a single-file model format made for running locally.\n"
"- **llama.cpp / Ollama** — the programs that actually run the model on your machine."
)
def how_to_find_specs(os_hint: str = "windows") -> str:
common = (
"**Not sure of your specs? Here's how to check:**\n\n"
)
if os_hint == "macos":
return common + (
"- Click the Apple menu (top-left) → **About This Mac**.\n"
"- It shows your chip (e.g. *Apple M2*) and **Memory** (e.g. *16 GB*).\n"
"- On a Mac, that one memory number is all you need — the graphics "
"share it."
)
if os_hint == "linux":
return common + (
"- RAM: run `free -h` in a terminal.\n"
"- Graphics card: run `nvidia-smi` (NVIDIA) or `lspci | grep VGA`.\n"
)
return common + (
"- **RAM:** press `Ctrl + Shift + Esc` → **Performance** tab → **Memory**.\n"
"- **Graphics card:** same window → **GPU**. The name is at the top "
"right (e.g. *NVIDIA RTX 3060*).\n"
"- No GPU section showing a real card? You likely have built-in "
"graphics — that's fine, just pick the 'built-in' option."
)