""" Putting it in plain words. The advisor produces structured facts; this module turns them into sentences a non-technical person actually understands, and into commands they can copy and paste. No jargon survives here without being explained. """ from .advisor import ( Advice, ModelVerdict, VERDICT_WORKS, VERDICT_COMPROMISE, VERDICT_NO, ) VERDICT_EMOJI = { VERDICT_WORKS: "🟢", VERDICT_COMPROMISE: "🟡", VERDICT_NO: "🔴", } VERDICT_WORD = { VERDICT_WORKS: "Works now", VERDICT_COMPROMISE: "Works, with compromises", VERDICT_NO: "Don't bother", } def speed_hint(v: ModelVerdict, spec) -> str: """A rough, honest feel for how fast replies will come.""" if v.verdict == VERDICT_NO: return "—" if v.verdict == VERDICT_COMPROMISE: return "Slow — usable for short tasks, not snappy chat." # Works now (fast path). Bigger models are still slower even on a GPU. if v.model.billions <= 4: return "Fast — replies feel instant." if v.model.billions <= 14: return "Comfortable — quick enough for live chat." return "Steady — fine, just not instant on big answers." # -------------------------------------------------------------------------- # Commands # -------------------------------------------------------------------------- def ollama_command(v: ModelVerdict) -> str: return f"ollama run {v.model.ollama_tag}" def llamacpp_command(v: ModelVerdict) -> str: # llama.cpp can pull a GGUF straight from Hugging Face by repo:quant. return (f"llama-server -hf {v.model.gguf_repo}:{v.quant.key} " f"-c {v.estimate.context_tokens}") # -------------------------------------------------------------------------- # Headline summary, in human words # -------------------------------------------------------------------------- def headline_text(advice: Advice) -> str: spec = advice.spec uc = advice.use_case h = advice.headline if h is None: return ( f"**Honest answer: this machine can't comfortably run local AI " f"for {uc.plain_name.lower()} yet.**\n\n" f"Even the smallest models need more memory than the " f"{spec.ram_gb:g} GB available here once everything else is " f"running. That's not a failure — small computers just have small " f"budgets. A free cloud option, or adding memory, would open this up." ) m = h.model q = h.quant fast = "on the graphics card" if spec.has_fast_path and h.verdict == VERDICT_WORKS else "on the processor" if h.verdict == VERDICT_WORKS: lead = f"**Yes — you can run a {m.plain_name} model {fast}, today.**" elif h.verdict == VERDICT_COMPROMISE: lead = f"**Sort of — a {m.plain_name} model will run, but with trade-offs.**" else: lead = f"**Not really — even a {m.plain_name} model is a stretch here.**" body = ( f"\n\nFor **{uc.plain_name.lower()}**, the sweet spot on your machine is a " f"**{m.plain_name}** model at the **{q.plain_name}** setting. " f"{m.good_for}\n\n" f"That needs about **{h.estimate.total_gb:g} GB** of memory " f"(model {h.estimate.weights_gb:g} GB + chat memory " f"{h.estimate.kv_cache_gb:g} GB + working space {h.estimate.overhead_gb:g} GB), " f"and you have roughly **{spec.fast_budget_gb:g} GB** fast / " f"**{spec.total_budget_gb:g} GB** total to play with." ) extra = "" if uc.note: extra += f"\n\n*Note for this job:* {uc.note}" if h.notes: extra += "\n\n" + "\n".join(f"- {n}" for n in h.notes) return lead + body + extra def jargon_glossary() -> str: return ( "**Plain-English glossary**\n\n" "- **Model** — the AI's 'brain'. Bigger = smarter but heavier.\n" "- **Parameters (e.g. 7B)** — how big the brain is. 7B = 7 billion. " "More = smarter and hungrier for memory.\n" "- **Quantisation (4-bit, 8-bit)** — shrinking the model so it fits. " "4-bit is the popular sweet spot: much smaller, barely-noticeable quality loss.\n" "- **VRAM** — the fast memory on a graphics card. The single biggest " "factor in what you can run quickly.\n" "- **RAM** — your computer's normal memory. Models can use it too, but it's slower.\n" "- **KV cache / 'chat memory'** — scratch space the model uses to " "remember the current conversation. Longer chats use more.\n" "- **GGUF** — a single-file model format made for running locally.\n" "- **llama.cpp / Ollama** — the programs that actually run the model on your machine." ) def how_to_find_specs(os_hint: str = "windows") -> str: common = ( "**Not sure of your specs? Here's how to check:**\n\n" ) if os_hint == "macos": return common + ( "- Click the Apple menu (top-left) → **About This Mac**.\n" "- It shows your chip (e.g. *Apple M2*) and **Memory** (e.g. *16 GB*).\n" "- On a Mac, that one memory number is all you need — the graphics " "share it." ) if os_hint == "linux": return common + ( "- RAM: run `free -h` in a terminal.\n" "- Graphics card: run `nvidia-smi` (NVIDIA) or `lspci | grep VGA`.\n" ) return common + ( "- **RAM:** press `Ctrl + Shift + Esc` → **Performance** tab → **Memory**.\n" "- **Graphics card:** same window → **GPU**. The name is at the top " "right (e.g. *NVIDIA RTX 3060*).\n" "- No GPU section showing a real card? You likely have built-in " "graphics — that's fine, just pick the 'built-in' option." )