Spaces:
Running on Zero
Running on Zero
| """ | |
| Engine v2: honest verdicts over REAL models from catalogue.json. | |
| This replaces both the size-class advisor and the placeholder families. Every | |
| option it returns is an actual model with a Hugging Face link, a license, and | |
| memory figures with provenance: | |
| - LLM / VLM weights = the EXACT GGUF file size in bytes from the Hub | |
| (ground truth — better than any params-times-bits estimate). | |
| - Chat memory (KV cache) = GQA-aware math from the model's real config | |
| (layers, hidden, kv-heads) when available; a conservative parameter-count | |
| heuristic when the repo is gated (labelled as estimated). | |
| - Working space includes a +0.577 GB buffer — the 95% load-success margin | |
| oobabooga fitted over 19,517 real measurements (gguf-vram-formula). | |
| - Non-GGUF families (vision / image gen / audio / embeddings / data) carry a | |
| single memory figure whose provenance is vendor-published, community- | |
| reported, or estimated — and the UI says which. | |
| The catalogue is baked into the repo at build time (refreshed by | |
| scripts/refresh_catalogue.py), so the running app makes no network calls. | |
| """ | |
| import json | |
| from functools import lru_cache | |
| from pathlib import Path | |
| from .hardware import HardwareSpec | |
| from .runtimes import pick_runtimes | |
| from .speed import bandwidth_for_spec, predict_decode_tps, feel_text | |
| _CATALOGUE_PATH = Path(__file__).resolve().parent.parent / "catalogue.json" | |
| # We only fill a budget to this fraction — the rest is breathing room. | |
| _SAFETY_FILL = 0.90 | |
| # oobabooga's fitted 95%-load-success buffer (GB), cited in the UI footnote. | |
| _CONFIDENCE_BUFFER_GB = 0.577 | |
| _VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"} | |
| _C_MODEL = "#818CF8" | |
| _C_WORK = "#868E9C" | |
| # Quant ladder quality order (matches scripts/refresh_catalogue.py). | |
| _QUANT_ORDER = ["Q8_0", "Q6_K", "Q5_K_M", "Q4_K_M", "IQ4_XS", "Q3_K_M", "Q2_K"] | |
| _FOUR_BIT_RANK = _QUANT_ORDER.index("IQ4_XS") # >= this index quality = sub-4-bit | |
| _COMPROMISE_QUANTS = ["Q4_K_M", "IQ4_XS", "Q3_K_M", "Q2_K"] | |
| # -------------------------------------------------------------------------- | |
| # Use cases | |
| # -------------------------------------------------------------------------- | |
| class UC: | |
| def __init__(self, key, plain, family, ctx=4096, min_b=0.0, good_b=0.0, | |
| factor=1.0, note=""): | |
| # min_b/good_b are LLM-quality bars in billions of params. They default | |
| # to 0 because they're meaningless for vision/audio/etc. — a 0.003B | |
| # YOLO is a complete, excellent model, not a too-small LLM. Only the | |
| # text use cases set them explicitly. | |
| self.key, self.plain_name, self.family = key, plain, family | |
| self.context_tokens, self.min_b, self.good_b = ctx, min_b, good_b | |
| self.overhead_factor, self.note = factor, note | |
| USE_CASES = {u.key: u for u in [ | |
| UC("chat", "Just chatting / asking questions", "llm", 4096, 0.5, 3.0), | |
| UC("writing", "Writing & summarising", "llm", 4096, 1.5, 7.0), | |
| UC("coding", "Coding help", "llm", 8192, 3.0, 7.0, | |
| note="Bigger models are much more reliable for code."), | |
| UC("agents", "Agents & tool use", "llm", 8192, 7.0, 7.0, 1.15, | |
| note="Needs steady instruction-following — go medium or larger."), | |
| UC("rag", "Chat with your documents", "llm", 16384, 3.0, 7.0, | |
| note="Long documents use extra memory for context — that's included here."), | |
| UC("translate", "Translation", "llm", 4096, 1.5, 7.0), | |
| UC("finetune", "Fine-tune an LLM (LoRA)", "llm", 2048, 3.0, 7.0, 2.2, | |
| note="Training needs roughly 2-3x the memory of just chatting. That's baked into these numbers."), | |
| UC("custom", "Your custom goal", "llm", 4096, 0.5, 7.0), | |
| UC("vlm", "Chat about images & video", "vlm", 4096, 1.5, 4.0), | |
| UC("detect", "Object detection", "vision"), | |
| UC("segment", "Image segmentation", "vision"), | |
| UC("pose", "Pose estimation (2D & 6-DoF)", "vision"), | |
| UC("classify", "Image classification", "vision"), | |
| UC("depth", "Depth estimation", "vision"), | |
| UC("ocr", "Read text from images (OCR)", "vision"), | |
| UC("train-vision", "Train a vision model", "vision", factor=3.0, | |
| note="Training needs roughly 3x the memory of running the same model."), | |
| UC("imagegen", "Generate images", "imagegen"), | |
| UC("inpaint", "Edit / inpaint images", "imagegen"), | |
| UC("upscale", "Upscale / restore images", "imagegen"), | |
| UC("videogen", "Generate video", "imagegen"), | |
| UC("bgremove", "Remove backgrounds", "imagegen"), | |
| UC("stt", "Speech to text", "audio"), | |
| UC("tts", "Text to speech / voice", "audio"), | |
| UC("music", "Generate music", "audio"), | |
| UC("embed", "Semantic search / embeddings", "embed"), | |
| UC("forecast", "Time-series forecasting", "data"), | |
| UC("tabular", "Predict from spreadsheets", "data"), | |
| ]} | |
| # Use cases answered by the whole LLM family (entries don't list these). | |
| _TEXT_UCS = {"chat", "writing", "coding", "agents", "rag", "translate", | |
| "finetune", "custom"} | |
| _TOOLS = { | |
| "llm": [ | |
| {"name": "Ollama", "what": "Type one line; it downloads and runs the model for you.", | |
| "install": "Get it from ollama.com", "tag": "Easiest"}, | |
| {"name": "LM Studio", "what": "A point-and-click app with a chat window, no commands.", | |
| "install": "Download from lmstudio.ai", "tag": "Easy"}, | |
| {"name": "llama.cpp", "what": "The lightweight engine under the hood. Runs GGUF files directly.", | |
| "install": "Releases on GitHub", "tag": "Advanced"}, | |
| ], | |
| "vision": [ | |
| {"name": "Ultralytics", "what": "One pip install, then detect objects from a webcam or file.", | |
| "install": "pip install ultralytics", "tag": "Easiest"}, | |
| {"name": "PyTorch", "what": "Full control for custom pipelines and training.", | |
| "install": "pytorch.org", "tag": "Advanced"}, | |
| ], | |
| "imagegen": [ | |
| {"name": "ComfyUI", "what": "Powerful visual node editor for image/video pipelines.", | |
| "install": "Download from GitHub", "tag": "Moderate"}, | |
| {"name": "diffusers", "what": "Hugging Face's Python library for generation pipelines.", | |
| "install": "pip install diffusers", "tag": "Moderate"}, | |
| {"name": "Fooocus", "what": "Image generation that 'just works': one folder, double-click.", | |
| "install": "Download from GitHub", "tag": "Easiest"}, | |
| ], | |
| "audio": [ | |
| {"name": "faster-whisper", "what": "Fast, accurate transcription with a tiny install.", | |
| "install": "pip install faster-whisper", "tag": "Easiest"}, | |
| {"name": "whisper.cpp", "what": "Runs Whisper efficiently on CPU and small machines.", | |
| "install": "Build from GitHub", "tag": "Advanced"}, | |
| ], | |
| "embed": [ | |
| {"name": "sentence-transformers", "what": "Turn text into searchable vectors in a few lines.", | |
| "install": "pip install sentence-transformers", "tag": "Easiest"}, | |
| {"name": "Chroma", "what": "A simple local database to store and search those vectors.", | |
| "install": "pip install chromadb", "tag": "Easy"}, | |
| ], | |
| "data": [ | |
| {"name": "Python + pip", "what": "These models ship as small Python packages.", | |
| "install": "pip install (see the model card)", "tag": "Easiest"}, | |
| ], | |
| } | |
| _TOOLS["vlm"] = _TOOLS["llm"] | |
| # -------------------------------------------------------------------------- | |
| # Catalogue access | |
| # -------------------------------------------------------------------------- | |
| def catalogue() -> dict: | |
| return json.loads(_CATALOGUE_PATH.read_text(encoding="utf-8")) | |
| def _by_use_case() -> dict: | |
| out: dict[str, list[dict]] = {} | |
| for e in catalogue()["entries"]: | |
| if e["family"] in ("llm", "vlm"): | |
| ucs = list(_TEXT_UCS) if e["family"] == "llm" else ["vlm"] | |
| else: | |
| ucs = e.get("use_cases", []) | |
| for uc in ucs: | |
| out.setdefault(uc, []).append(e) | |
| for uc in out: | |
| out[uc].sort(key=lambda e: e.get("params_b", 0), reverse=True) | |
| return out | |
| def catalogue_date() -> str: | |
| return catalogue().get("generated_at", "")[:10] | |
| # -------------------------------------------------------------------------- | |
| # Memory math | |
| # -------------------------------------------------------------------------- | |
| # Fallback architecture shapes by parameter count (conservative typicals), | |
| # used only when a gated repo hides its config.json. | |
| _ARCH_FALLBACK = [ | |
| (1.5, 24, 2048), (4.5, 28, 3072), (9.0, 32, 4096), | |
| (16.0, 40, 5120), (40.0, 48, 6656), (1e9, 80, 8192), | |
| ] | |
| def _kv_gb(entry: dict, ctx: int) -> tuple[float, bool]: | |
| """KV-cache GB for `ctx` tokens. Returns (gb, exact?).""" | |
| ctx = min(ctx, entry.get("context_len") or ctx) | |
| arch = entry.get("arch") | |
| if arch: | |
| per_layer = arch["hidden"] * arch["n_kv_heads"] / arch["n_heads"] | |
| return 2 * arch["n_layers"] * per_layer * ctx * 2 / 1e9, True | |
| params = entry.get("params_b", 4.0) | |
| for cap, layers, hidden in _ARCH_FALLBACK: | |
| if params <= cap: | |
| return 2 * layers * hidden * ctx * 2 * 0.30 / 1e9, False | |
| return 1.0, False | |
| def _overhead_gb(weights: float, factor: float) -> float: | |
| if factor >= 2.0: # training: optimizer state + activations dominate | |
| return round(_CONFIDENCE_BUFFER_GB + weights * (factor - 1.0), 2) | |
| return round((_CONFIDENCE_BUFFER_GB + 0.08 * weights) * factor, 2) | |
| def _estimate(entry: dict, quant: dict, ctx: int, factor: float) -> dict: | |
| weights = quant["file_gb"] | |
| kv, kv_exact = _kv_gb(entry, ctx) | |
| kv = round(kv, 2) | |
| overhead = _overhead_gb(weights, factor) | |
| return {"weights": weights, "kv": kv, "overhead": overhead, | |
| "total": round(weights + kv + overhead, 2), "kv_exact": kv_exact} | |
| # -------------------------------------------------------------------------- | |
| # Per-entry evaluation | |
| # -------------------------------------------------------------------------- | |
| def _quant_rank(key: str) -> int: | |
| return _QUANT_ORDER.index(key) if key in _QUANT_ORDER else len(_QUANT_ORDER) | |
| def _feel(entry: dict, verdict: str, spec: HardwareSpec) -> str: | |
| if verdict == "no": | |
| return "—" | |
| active = entry.get("active_params_b") or entry.get("params_b", 4) | |
| if verdict == "tight": | |
| if entry.get("active_params_b"): | |
| return f"Usable even part-offloaded (only {entry['active_params_b']:g}B active per word)" | |
| return "Slow — usable for short tasks, not snappy chat" | |
| if active <= 4: | |
| return "Fast — replies feel instant" | |
| if active <= 14: | |
| return "Comfortable — quick enough for live chat" | |
| return "Steady — fine, just not instant on big answers" | |
| def _eval_gguf(entry: dict, spec: HardwareSpec, uc: UC) -> dict: | |
| """Verdict for an LLM/VLM entry with a real quant ladder.""" | |
| fast, total = spec.fast_budget_gb, spec.total_budget_gb | |
| quants = sorted(entry.get("quants", []), key=lambda q: _quant_rank(q["key"])) | |
| ctx, factor = uc.context_tokens, uc.overhead_factor | |
| # Fast path: best quality quant >= 4-bit that fits the GPU budget. | |
| if spec.has_fast_path: | |
| for q in quants: | |
| if _quant_rank(q["key"]) > _FOUR_BIT_RANK: | |
| break # don't call a sub-4-bit squeeze "runs great" | |
| est = _estimate(entry, q, ctx, factor) | |
| if est["total"] <= fast * _SAFETY_FILL: | |
| return {"verdict": "great", "quant": q, "est": est} | |
| # Compromise: spill into ordinary RAM, shrinking quality only if needed. | |
| for qkey in _COMPROMISE_QUANTS: | |
| q = next((x for x in quants if x["key"] == qkey), None) | |
| if not q: | |
| continue | |
| est = _estimate(entry, q, ctx, factor) | |
| if est["total"] <= total * _SAFETY_FILL: | |
| return {"verdict": "tight", "quant": q, "est": est} | |
| q = quants[-1] if quants else {"key": "Q4_K_M", "plain": "Balanced (4-bit)", | |
| "file_gb": entry.get("params_b", 4) * 0.6} | |
| return {"verdict": "no", "quant": q, "est": _estimate(entry, q, ctx, factor)} | |
| def _eval_flat(entry: dict, spec: HardwareSpec, uc: UC) -> dict: | |
| """Verdict for a non-GGUF entry with one memory figure.""" | |
| need = round(entry.get("mem_gb", 4.0) * uc.overhead_factor, 2) | |
| fast, total = spec.fast_budget_gb, spec.total_budget_gb | |
| est = {"weights": need, "kv": 0.0, "overhead": 0.0, "total": need, "kv_exact": False} | |
| setting = {"key": "full", "plain": "Full model", "file_gb": need} | |
| if spec.has_fast_path and need <= fast * _SAFETY_FILL: | |
| return {"verdict": "great", "quant": setting, "est": est} | |
| # Image/video generation without a GPU is minutes-per-image: say so. | |
| if entry["family"] == "imagegen" and not spec.has_fast_path and need > 4: | |
| return {"verdict": "no", "quant": setting, "est": est} | |
| if need <= total * _SAFETY_FILL: | |
| return {"verdict": "tight", "quant": setting, "est": est} | |
| return {"verdict": "no", "quant": setting, "est": est} | |
| def _evaluate(entry: dict, spec: HardwareSpec, uc: UC) -> dict: | |
| if entry.get("quants"): | |
| r = _eval_gguf(entry, spec, uc) | |
| else: | |
| r = _eval_flat(entry, spec, uc) | |
| r["entry"] = entry | |
| return r | |
| # -------------------------------------------------------------------------- | |
| # Advise: full UI-shaped result | |
| # -------------------------------------------------------------------------- | |
| def _speed_pred(r: dict, spec: HardwareSpec, bw: float | None) -> dict | None: | |
| """Measured/roofline tok/s prediction for a GGUF option, if bandwidth known.""" | |
| e, v, est = r["entry"], r["verdict"], r["est"] | |
| if not e.get("quants") or v == "no" or not bw: | |
| return None | |
| params = e.get("params_b") or 1.0 | |
| active = (e.get("active_params_b") or params) / params | |
| if v == "tight": | |
| # share of the read bytes that live in slow system RAM | |
| fast_room = spec.fast_budget_gb * _SAFETY_FILL | |
| offload = max(0.0, min(1.0, 1 - fast_room / max(est["total"], 0.1))) | |
| else: | |
| offload = 0.0 | |
| return predict_decode_tps( | |
| bandwidth_gbs=bw, weights_gb=est["weights"], kv_gb=est["kv"], | |
| active_fraction=active, offload_fraction=offload, | |
| ) | |
| def _option_json(r: dict, spec: HardwareSpec, bw: float | None = None) -> dict: | |
| e, v = r["entry"], r["verdict"] | |
| pred = _speed_pred(r, spec, bw) | |
| feel = feel_text(pred) if pred else _feel(e, v, spec) | |
| if not e.get("quants") and v == "tight" and not spec.has_fast_path: | |
| feel = "Runs on the processor — slow but workable" | |
| lic_label = e.get("license", "") | |
| return { | |
| "verdict": v, | |
| "model": e["name"], | |
| "desc": e.get("good_for", ""), | |
| "setting": r["quant"].get("plain", "Full model"), | |
| "memory": "Too big" if v == "no" else f"{r['est']['total']:g} GB", | |
| "feel": feel, | |
| "params_b": e.get("params_b"), | |
| "active_params_b": e.get("active_params_b"), | |
| "url": (e.get("links") or {}).get("hf") or (e.get("links") or {}).get("home", ""), | |
| "license": lic_label, | |
| "license_note": e.get("license_note", ""), | |
| "gated": e.get("gated", False), | |
| "run": e.get("run", {}), | |
| "provenance": e.get("provenance", "estimated"), | |
| "stale": e.get("stale", False), | |
| } | |
| def _pick_headline(results: list[dict], uc: UC) -> tuple[dict | None, bool]: | |
| great = [r for r in results if r["verdict"] == "great"] | |
| tight = [r for r in results if r["verdict"] == "tight"] | |
| def params(r): | |
| return r["entry"].get("params_b", 0) | |
| great_ok = [r for r in great if params(r) >= uc.min_b] | |
| tight_ok = [r for r in tight if params(r) >= uc.min_b] | |
| if great_ok: | |
| # Fast-and-capable is the best answer: biggest model that runs great. | |
| return max(great_ok, key=params), True | |
| if tight_ok: | |
| if uc.good_b > 0: | |
| # LLMs: close to the ideal size, not needlessly oversized-and-slow. | |
| below = [r for r in tight_ok if params(r) <= uc.good_b * 1.5] | |
| return (max(below, key=params) if below else min(tight_ok, key=params)), True | |
| # Non-LLM families: the biggest model that fits is simply the best one. | |
| return max(tight_ok, key=params), True | |
| if great: | |
| return max(great, key=params), False | |
| if tight: | |
| return min(tight, key=params), False | |
| return None, False | |
| def _provenance_line(headline: dict | None) -> str: | |
| if not headline: | |
| return "" | |
| e = headline["entry"] | |
| prov = e.get("provenance", "estimated") | |
| if prov == "filesize": | |
| line = ("Model size is the exact file size on Hugging Face. Chat memory and " | |
| "working space are conservative estimates with a 0.58 GB safety buffer " | |
| "(the 95% load-success margin fitted from ~19,500 real measurements).") | |
| if not headline["est"].get("kv_exact"): | |
| line += " This repo hides its exact shape, so chat memory is estimated from its size." | |
| return line | |
| if prov == "vendor": | |
| return "The memory figure is the maker's own published number." | |
| if prov == "community": | |
| return "The memory figure is community-reported, not vendor-published — treat it as a good estimate." | |
| return "The memory figure is estimated from the model's size — conservative, not measured." | |
| def advise_real(payload: dict, spec: HardwareSpec) -> dict: | |
| uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"]) | |
| candidates = _by_use_case().get(uc.key, []) | |
| # Honest gap, not a fake answer: if the catalogue doesn't cover a goal yet, | |
| # say so and point at the live lookup instead of inventing options. | |
| if not candidates: | |
| return { | |
| "catalogue_version": catalogue_date(), | |
| "verdict": "tight", "verdict_word": "Not covered yet", | |
| "headline": "Our catalogue doesn't cover this goal yet.", | |
| "detail": ("FitCheck only answers from verified model data, and nothing in the " | |
| "current catalogue serves this goal — so rather than guess, we'd " | |
| "rather say so. If you know a specific model for it, paste its " | |
| "Hugging Face id in the <b>'Have a specific model in mind?'</b> box " | |
| "and we'll check that exact model against your machine."), | |
| "note": "The catalogue grows every night; niche goals are next in line.", | |
| "gauge": {}, "options": [], "tools": _TOOLS.get(uc.family, []), | |
| "commands": {"intro": "", "items": []}, "provenance": "", | |
| "meets_goal": False, "use_case": uc.plain_name, | |
| } | |
| results = [_evaluate(e, spec, uc) for e in candidates] | |
| fast, total = spec.fast_budget_gb, spec.total_budget_gb | |
| headline, meets_goal = _pick_headline(results, uc) | |
| bw, bw_src = bandwidth_for_spec(spec) | |
| options = [_option_json(r, spec, bw) for r in results] | |
| if headline: | |
| e, est, q = headline["entry"], headline["est"], headline["quant"] | |
| hv = headline["verdict"] | |
| need = est["total"] | |
| where = ("on your Mac" if spec.is_apple_silicon and hv == "great" else | |
| "on your graphics card" if hv == "great" and spec.has_fast_path else | |
| "using your computer's memory" if hv == "tight" else "") | |
| if hv == "great": | |
| head_text = f"Yes, you can run {e['name']} {where}, today." | |
| else: | |
| head_text = f"Sort of. {e['name']} will run {where}, with trade-offs." | |
| if e.get("quants"): | |
| detail = ( | |
| f"For this goal, the honest pick is <b>{e['name']}</b> at the " | |
| f"<b>{q.get('plain', q['key'])}</b> setting. {e.get('good_for','')} " | |
| f"It needs about <b>{need:g} GB</b> " | |
| f"(the model file is {est['weights']:g} GB — exact size on Hugging Face — " | |
| f"plus {est['kv']:g} GB chat memory and {est['overhead']:g} GB working space), " | |
| f"and you have roughly <b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total." | |
| ) | |
| else: | |
| detail = ( | |
| f"For this goal, the honest pick is <b>{e['name']}</b>. " | |
| f"{e.get('good_for','')} It needs about <b>{need:g} GB</b>, and you have " | |
| f"roughly <b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total." | |
| ) | |
| model_part, work_part = est["weights"], round(need - est["weights"], 2) | |
| else: | |
| hv = "no" | |
| smallest = min(results, key=lambda r: r["est"]["total"], default=None) | |
| need = smallest["est"]["total"] if smallest else 1.0 | |
| head_text = "This goal is a stretch on this machine. Here's the honest picture." | |
| detail = ( | |
| f"Even the lightest option here needs about <b>{need:g} GB</b>, but this " | |
| f"machine can offer only about <b>{total:g} GB</b> once the operating system " | |
| f"has its share. That's not a failure — small computers just have small " | |
| f"budgets. Adding memory, or a free cloud notebook, would open this up." | |
| ) | |
| model_part, work_part = round(need * 0.8, 2), round(need * 0.2, 2) | |
| note_bits = [] | |
| if headline and not meets_goal: | |
| note_bits.append( | |
| f"This is the best this machine can do, but it's on the small side for " | |
| f"{uc.plain_name.lower()} — treat results as 'okay', not great.") | |
| if uc.note: | |
| note_bits.append(uc.note) | |
| if headline and headline["entry"].get("mem_note"): | |
| note_bits.append(headline["entry"]["mem_note"]) | |
| if headline and headline["entry"].get("license_note"): | |
| note_bits.append(headline["entry"]["license_note"]) | |
| if headline and headline["entry"].get("gated"): | |
| note_bits.append("This model is gated: accept its terms on Hugging Face once before downloading.") | |
| scale = max(total, need, 1) * 1.05 | |
| gauge = { | |
| "need_gb": f"{need:g} GB needed", | |
| "fast_gb": f"{fast:g} GB", "total_gb": f"{total:g} GB", | |
| "fill_pct": round(min(need / scale, 1.0) * 100, 1), | |
| "mark_pct": round(min(fast / scale, 1.0) * 100, 1), | |
| "breakdown": [ | |
| {"label": f"Model {model_part:g} GB", "color": _C_MODEL}, | |
| {"label": f"Chat memory + working space {work_part:g} GB", "color": _C_WORK}, | |
| ], | |
| } | |
| speed = None | |
| if headline: | |
| pred = _speed_pred(headline, spec, bw) | |
| if pred: | |
| speed = {**pred, "bw": bw, "bw_source": bw_src, | |
| "model": headline["entry"]["name"]} | |
| if uc.family == "llm": | |
| tools = [{"name": r.name, "what": r.plain_what, "install": r.install_hint, | |
| "tag": r.difficulty} for r in pick_runtimes(spec)] | |
| else: | |
| tools = _TOOLS.get(uc.family, []) | |
| commands = {"intro": "These get you running in minutes — real commands for the exact pick above.", | |
| "items": []} | |
| if headline: | |
| run = headline["entry"].get("run", {}) | |
| if run.get("ollama"): | |
| commands["items"].append({"label": "Easy way (Ollama)", "code": run["ollama"]}) | |
| if run.get("llamacpp"): | |
| commands["items"].append({"label": "Power way (llama.cpp)", "code": run["llamacpp"]}) | |
| if run.get("pip"): | |
| commands["items"].append({"label": "Install", "code": run["pip"]}) | |
| return { | |
| "catalogue_version": catalogue_date(), | |
| "verdict": hv, | |
| "verdict_word": _VERDICT_WORD[hv], | |
| "headline": head_text, | |
| "detail": detail, | |
| "note": " ".join(note_bits), | |
| "gauge": gauge, | |
| "options": options, | |
| "tools": tools, | |
| "commands": commands, | |
| "provenance": _provenance_line(headline) + ( | |
| f" Speed is {'predicted from real community measurements' if speed and speed['method'] == 'measured-model' else 'an analytical bandwidth estimate'}" | |
| f" — see 'Why this speed?' below." if speed else ""), | |
| "speed": speed, | |
| "meets_goal": meets_goal, | |
| "use_case": uc.plain_name, | |
| "headline_model": headline["entry"]["name"] if headline else "", | |
| } | |
| # -------------------------------------------------------------------------- | |
| # Reverse mode: "what machine do I need for X?" | |
| # -------------------------------------------------------------------------- | |
| # Ladders are cheap -> expensive. Budget hints are rough 2026 street prices for | |
| # a whole sensible build, shown as guidance, not gospel. | |
| _PC_LADDER = [ | |
| ("Any old laptop (8 GB RAM, no GPU)", dict(ram_gb=8, vram_gb=0, vendor="none"), "what you may already own"), | |
| ("16 GB RAM laptop, no GPU", dict(ram_gb=16, vram_gb=0, vendor="none"), "~$500"), | |
| ("16 GB RAM + RTX 4060 (8 GB)", dict(ram_gb=16, vram_gb=8, vendor="nvidia"), "~$800"), | |
| ("16 GB RAM + RTX 3060 (12 GB)", dict(ram_gb=16, vram_gb=12, vendor="nvidia"), "~$900"), | |
| ("32 GB RAM + RTX 5070 (12 GB)", dict(ram_gb=32, vram_gb=12, vendor="nvidia"), "~$1,300"), | |
| ("32 GB RAM + RTX 5070 Ti (16 GB)", dict(ram_gb=32, vram_gb=16, vendor="nvidia"), "~$1,600"), | |
| ("32 GB RAM + RTX 4090 (24 GB)", dict(ram_gb=32, vram_gb=24, vendor="nvidia"), "~$2,500"), | |
| ("64 GB RAM + RTX 5090 (32 GB)", dict(ram_gb=64, vram_gb=32, vendor="nvidia"), "~$3,500+"), | |
| ] | |
| _MAC_LADDER = [ | |
| ("Mac with 16 GB unified memory", dict(ram_gb=16), "~$1,000"), | |
| ("Mac with 24 GB unified memory", dict(ram_gb=24), "~$1,400"), | |
| ("Mac with 32 GB unified memory", dict(ram_gb=32), "~$1,800"), | |
| ("Mac with 64 GB unified memory", dict(ram_gb=64), "~$2,800"), | |
| ("Mac with 128 GB unified memory", dict(ram_gb=128), "~$4,500+"), | |
| ] | |
| def _spec_for_tier(kind: str, hw: dict) -> HardwareSpec: | |
| if kind == "mac": | |
| return HardwareSpec(os="macos", ram_gb=hw["ram_gb"], gpu_vendor="apple", | |
| is_apple_silicon=True, form_factor="mac") | |
| return HardwareSpec(os="windows", ram_gb=hw["ram_gb"], | |
| gpu_vendor=hw.get("vendor", "none"), | |
| vram_gb=hw.get("vram_gb", 0.0), form_factor="desktop") | |
| def min_specs(usecases) -> dict: | |
| """For one OR several goals: the cheapest tier where EVERY goal genuinely | |
| works (the union of requirements, not a sum), the tier where every goal | |
| runs great, and what each goal would actually run on those tiers. | |
| Pure engine inversion — fully offline.""" | |
| if isinstance(usecases, str): | |
| usecases = [usecases] | |
| seen = set() | |
| ucs = [] | |
| for u in usecases or ["chat"]: | |
| uc = USE_CASES.get(u, USE_CASES["chat"]) | |
| if uc.key not in seen: | |
| seen.add(uc.key) | |
| ucs.append(uc) | |
| def walk(kind, ladder): | |
| minimum = comfortable = None | |
| for label, hw, price in ladder: | |
| spec = _spec_for_tier(kind, hw) | |
| per_goal, all_meet, all_great = [], True, True | |
| for uc in ucs: | |
| res = advise_real({"usecase": uc.key}, spec) | |
| all_meet &= res["meets_goal"] and res["verdict"] in ("great", "tight") | |
| all_great &= res["meets_goal"] and res["verdict"] == "great" | |
| per_goal.append({"goal": uc.plain_name, | |
| "model": res["headline_model"] or "nothing realistic", | |
| "verdict": res["verdict"]}) | |
| tier = {"label": label, "price": price, "goals": per_goal, | |
| "runs": "; ".join(f"{g['goal']}: {g['model']}" for g in per_goal) | |
| if len(per_goal) > 1 else per_goal[0]["model"]} | |
| if minimum is None and all_meet: | |
| minimum = tier | |
| if comfortable is None and all_great: | |
| comfortable = tier | |
| if minimum and comfortable: | |
| break | |
| return minimum, comfortable | |
| pc_min, pc_comfy = walk("pc", _PC_LADDER) | |
| mac_min, mac_comfy = walk("mac", _MAC_LADDER) | |
| notes = [uc.note for uc in ucs if uc.note] | |
| return { | |
| "use_case": " + ".join(uc.plain_name for uc in ucs), | |
| "goals": [uc.plain_name for uc in ucs], | |
| "catalogue_version": catalogue_date(), | |
| "note": " ".join(notes), | |
| "pc": {"minimum": pc_min, "comfortable": pc_comfy}, | |
| "mac": {"minimum": mac_min, "comfortable": mac_comfy}, | |
| "disclaimer": ("Price hints are rough 2026 street prices for a sensible whole " | |
| "build — they vary a lot by region and second-hand luck. The " | |
| "memory math is the same conservative engine as the main check." | |
| + (" Tiers are the union of every goal you picked: each one has " | |
| "to genuinely work." if len(ucs) > 1 else "")), | |
| } | |