Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

File size: 5,787 Bytes

12d2e34

"""
Putting it in plain words.

The advisor produces structured facts; this module turns them into sentences a
non-technical person actually understands, and into commands they can copy and
paste. No jargon survives here without being explained.
"""

from .advisor import (
    Advice,
    ModelVerdict,
    VERDICT_WORKS,
    VERDICT_COMPROMISE,
    VERDICT_NO,
)

VERDICT_EMOJI = {
    VERDICT_WORKS: "🟢",
    VERDICT_COMPROMISE: "🟡",
    VERDICT_NO: "🔴",
}

VERDICT_WORD = {
    VERDICT_WORKS: "Works now",
    VERDICT_COMPROMISE: "Works, with compromises",
    VERDICT_NO: "Don't bother",
}


def speed_hint(v: ModelVerdict, spec) -> str:
    """A rough, honest feel for how fast replies will come."""
    if v.verdict == VERDICT_NO:
        return "—"
    if v.verdict == VERDICT_COMPROMISE:
        return "Slow — usable for short tasks, not snappy chat."
    # Works now (fast path). Bigger models are still slower even on a GPU.
    if v.model.billions <= 4:
        return "Fast — replies feel instant."
    if v.model.billions <= 14:
        return "Comfortable — quick enough for live chat."
    return "Steady — fine, just not instant on big answers."


# --------------------------------------------------------------------------
# Commands
# --------------------------------------------------------------------------

def ollama_command(v: ModelVerdict) -> str:
    return f"ollama run {v.model.ollama_tag}"


def llamacpp_command(v: ModelVerdict) -> str:
    # llama.cpp can pull a GGUF straight from Hugging Face by repo:quant.
    return (f"llama-server -hf {v.model.gguf_repo}:{v.quant.key} "
            f"-c {v.estimate.context_tokens}")


# --------------------------------------------------------------------------
# Headline summary, in human words
# --------------------------------------------------------------------------

def headline_text(advice: Advice) -> str:
    spec = advice.spec
    uc = advice.use_case
    h = advice.headline

    if h is None:
        return (
            f"**Honest answer: this machine can't comfortably run local AI "
            f"for {uc.plain_name.lower()} yet.**\n\n"
            f"Even the smallest models need more memory than the "
            f"{spec.ram_gb:g} GB available here once everything else is "
            f"running. That's not a failure — small computers just have small "
            f"budgets. A free cloud option, or adding memory, would open this up."
        )

    m = h.model
    q = h.quant
    fast = "on the graphics card" if spec.has_fast_path and h.verdict == VERDICT_WORKS else "on the processor"

    if h.verdict == VERDICT_WORKS:
        lead = f"**Yes — you can run a {m.plain_name} model {fast}, today.**"
    elif h.verdict == VERDICT_COMPROMISE:
        lead = f"**Sort of — a {m.plain_name} model will run, but with trade-offs.**"
    else:
        lead = f"**Not really — even a {m.plain_name} model is a stretch here.**"

    body = (
        f"\n\nFor **{uc.plain_name.lower()}**, the sweet spot on your machine is a "
        f"**{m.plain_name}** model at the **{q.plain_name}** setting. "
        f"{m.good_for}\n\n"
        f"That needs about **{h.estimate.total_gb:g} GB** of memory "
        f"(model {h.estimate.weights_gb:g} GB + chat memory "
        f"{h.estimate.kv_cache_gb:g} GB + working space {h.estimate.overhead_gb:g} GB), "
        f"and you have roughly **{spec.fast_budget_gb:g} GB** fast / "
        f"**{spec.total_budget_gb:g} GB** total to play with."
    )

    extra = ""
    if uc.note:
        extra += f"\n\n*Note for this job:* {uc.note}"
    if h.notes:
        extra += "\n\n" + "\n".join(f"- {n}" for n in h.notes)

    return lead + body + extra


def jargon_glossary() -> str:
    return (
        "**Plain-English glossary**\n\n"
        "- **Model** — the AI's 'brain'. Bigger = smarter but heavier.\n"
        "- **Parameters (e.g. 7B)** — how big the brain is. 7B = 7 billion. "
        "More = smarter and hungrier for memory.\n"
        "- **Quantisation (4-bit, 8-bit)** — shrinking the model so it fits. "
        "4-bit is the popular sweet spot: much smaller, barely-noticeable quality loss.\n"
        "- **VRAM** — the fast memory on a graphics card. The single biggest "
        "factor in what you can run quickly.\n"
        "- **RAM** — your computer's normal memory. Models can use it too, but it's slower.\n"
        "- **KV cache / 'chat memory'** — scratch space the model uses to "
        "remember the current conversation. Longer chats use more.\n"
        "- **GGUF** — a single-file model format made for running locally.\n"
        "- **llama.cpp / Ollama** — the programs that actually run the model on your machine."
    )


def how_to_find_specs(os_hint: str = "windows") -> str:
    common = (
        "**Not sure of your specs? Here's how to check:**\n\n"
    )
    if os_hint == "macos":
        return common + (
            "- Click the  Apple menu (top-left) → **About This Mac**.\n"
            "- It shows your chip (e.g. *Apple M2*) and **Memory** (e.g. *16 GB*).\n"
            "- On a Mac, that one memory number is all you need — the graphics "
            "share it."
        )
    if os_hint == "linux":
        return common + (
            "- RAM: run `free -h` in a terminal.\n"
            "- Graphics card: run `nvidia-smi` (NVIDIA) or `lspci | grep VGA`.\n"
        )
    return common + (
        "- **RAM:** press `Ctrl + Shift + Esc` → **Performance** tab → **Memory**.\n"
        "- **Graphics card:** same window → **GPU**. The name is at the top "
        "right (e.g. *NVIDIA RTX 3060*).\n"
        "- No GPU section showing a real card? You likely have built-in "
        "graphics — that's fine, just pick the 'built-in' option."
    )