Spaces:

build-small-hackathon
/

FitCheck

Running on Zero

App Files Files Community

FitCheck / engine /real_advisor.py

cn0303

Speed predictions with receipts: bandwidth roofline, real-runs chart, honest provenance

ee8ca43 verified about 19 hours ago

raw

history blame contribute delete

28.6 kB

	"""
	Engine v2: honest verdicts over REAL models from catalogue.json.

	This replaces both the size-class advisor and the placeholder families. Every
	option it returns is an actual model with a Hugging Face link, a license, and
	memory figures with provenance:

	- LLM / VLM weights = the EXACT GGUF file size in bytes from the Hub
	(ground truth — better than any params-times-bits estimate).
	- Chat memory (KV cache) = GQA-aware math from the model's real config
	(layers, hidden, kv-heads) when available; a conservative parameter-count
	heuristic when the repo is gated (labelled as estimated).
	- Working space includes a +0.577 GB buffer — the 95% load-success margin
	oobabooga fitted over 19,517 real measurements (gguf-vram-formula).
	- Non-GGUF families (vision / image gen / audio / embeddings / data) carry a
	single memory figure whose provenance is vendor-published, community-
	reported, or estimated — and the UI says which.

	The catalogue is baked into the repo at build time (refreshed by
	scripts/refresh_catalogue.py), so the running app makes no network calls.
	"""

	import json
	from functools import lru_cache
	from pathlib import Path

	from .hardware import HardwareSpec
	from .runtimes import pick_runtimes
	from .speed import bandwidth_for_spec, predict_decode_tps, feel_text

	_CATALOGUE_PATH = Path(__file__).resolve().parent.parent / "catalogue.json"

	# We only fill a budget to this fraction — the rest is breathing room.
	_SAFETY_FILL = 0.90
	# oobabooga's fitted 95%-load-success buffer (GB), cited in the UI footnote.
	_CONFIDENCE_BUFFER_GB = 0.577

	_VERDICT_WORD = {"great": "Runs great", "tight": "Tight, but works", "no": "Won't fit"}
	_C_MODEL = "#818CF8"
	_C_WORK = "#868E9C"

	# Quant ladder quality order (matches scripts/refresh_catalogue.py).
	_QUANT_ORDER = ["Q8_0", "Q6_K", "Q5_K_M", "Q4_K_M", "IQ4_XS", "Q3_K_M", "Q2_K"]
	_FOUR_BIT_RANK = _QUANT_ORDER.index("IQ4_XS") # >= this index quality = sub-4-bit
	_COMPROMISE_QUANTS = ["Q4_K_M", "IQ4_XS", "Q3_K_M", "Q2_K"]


	# --------------------------------------------------------------------------
	# Use cases
	# --------------------------------------------------------------------------

	class UC:
	def __init__(self, key, plain, family, ctx=4096, min_b=0.0, good_b=0.0,
	factor=1.0, note=""):
	# min_b/good_b are LLM-quality bars in billions of params. They default
	# to 0 because they're meaningless for vision/audio/etc. — a 0.003B
	# YOLO is a complete, excellent model, not a too-small LLM. Only the
	# text use cases set them explicitly.
	self.key, self.plain_name, self.family = key, plain, family
	self.context_tokens, self.min_b, self.good_b = ctx, min_b, good_b
	self.overhead_factor, self.note = factor, note


	USE_CASES = {u.key: u for u in [
	UC("chat", "Just chatting / asking questions", "llm", 4096, 0.5, 3.0),
	UC("writing", "Writing & summarising", "llm", 4096, 1.5, 7.0),
	UC("coding", "Coding help", "llm", 8192, 3.0, 7.0,
	note="Bigger models are much more reliable for code."),
	UC("agents", "Agents & tool use", "llm", 8192, 7.0, 7.0, 1.15,
	note="Needs steady instruction-following — go medium or larger."),
	UC("rag", "Chat with your documents", "llm", 16384, 3.0, 7.0,
	note="Long documents use extra memory for context — that's included here."),
	UC("translate", "Translation", "llm", 4096, 1.5, 7.0),
	UC("finetune", "Fine-tune an LLM (LoRA)", "llm", 2048, 3.0, 7.0, 2.2,
	note="Training needs roughly 2-3x the memory of just chatting. That's baked into these numbers."),
	UC("custom", "Your custom goal", "llm", 4096, 0.5, 7.0),
	UC("vlm", "Chat about images & video", "vlm", 4096, 1.5, 4.0),
	UC("detect", "Object detection", "vision"),
	UC("segment", "Image segmentation", "vision"),
	UC("pose", "Pose estimation (2D & 6-DoF)", "vision"),
	UC("classify", "Image classification", "vision"),
	UC("depth", "Depth estimation", "vision"),
	UC("ocr", "Read text from images (OCR)", "vision"),
	UC("train-vision", "Train a vision model", "vision", factor=3.0,
	note="Training needs roughly 3x the memory of running the same model."),
	UC("imagegen", "Generate images", "imagegen"),
	UC("inpaint", "Edit / inpaint images", "imagegen"),
	UC("upscale", "Upscale / restore images", "imagegen"),
	UC("videogen", "Generate video", "imagegen"),
	UC("bgremove", "Remove backgrounds", "imagegen"),
	UC("stt", "Speech to text", "audio"),
	UC("tts", "Text to speech / voice", "audio"),
	UC("music", "Generate music", "audio"),
	UC("embed", "Semantic search / embeddings", "embed"),
	UC("forecast", "Time-series forecasting", "data"),
	UC("tabular", "Predict from spreadsheets", "data"),
	]}

	# Use cases answered by the whole LLM family (entries don't list these).
	_TEXT_UCS = {"chat", "writing", "coding", "agents", "rag", "translate",
	"finetune", "custom"}

	_TOOLS = {
	"llm": [
	{"name": "Ollama", "what": "Type one line; it downloads and runs the model for you.",
	"install": "Get it from ollama.com", "tag": "Easiest"},
	{"name": "LM Studio", "what": "A point-and-click app with a chat window, no commands.",
	"install": "Download from lmstudio.ai", "tag": "Easy"},
	{"name": "llama.cpp", "what": "The lightweight engine under the hood. Runs GGUF files directly.",
	"install": "Releases on GitHub", "tag": "Advanced"},
	],
	"vision": [
	{"name": "Ultralytics", "what": "One pip install, then detect objects from a webcam or file.",
	"install": "pip install ultralytics", "tag": "Easiest"},
	{"name": "PyTorch", "what": "Full control for custom pipelines and training.",
	"install": "pytorch.org", "tag": "Advanced"},
	],
	"imagegen": [
	{"name": "ComfyUI", "what": "Powerful visual node editor for image/video pipelines.",
	"install": "Download from GitHub", "tag": "Moderate"},
	{"name": "diffusers", "what": "Hugging Face's Python library for generation pipelines.",
	"install": "pip install diffusers", "tag": "Moderate"},
	{"name": "Fooocus", "what": "Image generation that 'just works': one folder, double-click.",
	"install": "Download from GitHub", "tag": "Easiest"},
	],
	"audio": [
	{"name": "faster-whisper", "what": "Fast, accurate transcription with a tiny install.",
	"install": "pip install faster-whisper", "tag": "Easiest"},
	{"name": "whisper.cpp", "what": "Runs Whisper efficiently on CPU and small machines.",
	"install": "Build from GitHub", "tag": "Advanced"},
	],
	"embed": [
	{"name": "sentence-transformers", "what": "Turn text into searchable vectors in a few lines.",
	"install": "pip install sentence-transformers", "tag": "Easiest"},
	{"name": "Chroma", "what": "A simple local database to store and search those vectors.",
	"install": "pip install chromadb", "tag": "Easy"},
	],
	"data": [
	{"name": "Python + pip", "what": "These models ship as small Python packages.",
	"install": "pip install (see the model card)", "tag": "Easiest"},
	],
	}
	_TOOLS["vlm"] = _TOOLS["llm"]


	# --------------------------------------------------------------------------
	# Catalogue access
	# --------------------------------------------------------------------------

	@lru_cache(maxsize=1)
	def catalogue() -> dict:
	return json.loads(_CATALOGUE_PATH.read_text(encoding="utf-8"))


	@lru_cache(maxsize=1)
	def _by_use_case() -> dict:
	out: dict[str, list[dict]] = {}
	for e in catalogue()["entries"]:
	if e["family"] in ("llm", "vlm"):
	ucs = list(_TEXT_UCS) if e["family"] == "llm" else ["vlm"]
	else:
	ucs = e.get("use_cases", [])
	for uc in ucs:
	out.setdefault(uc, []).append(e)
	for uc in out:
	out[uc].sort(key=lambda e: e.get("params_b", 0), reverse=True)
	return out


	def catalogue_date() -> str:
	return catalogue().get("generated_at", "")[:10]


	# --------------------------------------------------------------------------
	# Memory math
	# --------------------------------------------------------------------------

	# Fallback architecture shapes by parameter count (conservative typicals),
	# used only when a gated repo hides its config.json.
	_ARCH_FALLBACK = [
	(1.5, 24, 2048), (4.5, 28, 3072), (9.0, 32, 4096),
	(16.0, 40, 5120), (40.0, 48, 6656), (1e9, 80, 8192),
	]


	def _kv_gb(entry: dict, ctx: int) -> tuple[float, bool]:
	"""KV-cache GB for `ctx` tokens. Returns (gb, exact?)."""
	ctx = min(ctx, entry.get("context_len") or ctx)
	arch = entry.get("arch")
	if arch:
	per_layer = arch["hidden"] * arch["n_kv_heads"] / arch["n_heads"]
	return 2 * arch["n_layers"] * per_layer * ctx * 2 / 1e9, True
	params = entry.get("params_b", 4.0)
	for cap, layers, hidden in _ARCH_FALLBACK:
	if params <= cap:
	return 2 * layers * hidden * ctx * 2 * 0.30 / 1e9, False
	return 1.0, False


	def _overhead_gb(weights: float, factor: float) -> float:
	if factor >= 2.0: # training: optimizer state + activations dominate
	return round(_CONFIDENCE_BUFFER_GB + weights * (factor - 1.0), 2)
	return round((_CONFIDENCE_BUFFER_GB + 0.08 * weights) * factor, 2)


	def _estimate(entry: dict, quant: dict, ctx: int, factor: float) -> dict:
	weights = quant["file_gb"]
	kv, kv_exact = _kv_gb(entry, ctx)
	kv = round(kv, 2)
	overhead = _overhead_gb(weights, factor)
	return {"weights": weights, "kv": kv, "overhead": overhead,
	"total": round(weights + kv + overhead, 2), "kv_exact": kv_exact}


	# --------------------------------------------------------------------------
	# Per-entry evaluation
	# --------------------------------------------------------------------------

	def _quant_rank(key: str) -> int:
	return _QUANT_ORDER.index(key) if key in _QUANT_ORDER else len(_QUANT_ORDER)


	def _feel(entry: dict, verdict: str, spec: HardwareSpec) -> str:
	if verdict == "no":
	return "—"
	active = entry.get("active_params_b") or entry.get("params_b", 4)
	if verdict == "tight":
	if entry.get("active_params_b"):
	return f"Usable even part-offloaded (only {entry['active_params_b']:g}B active per word)"
	return "Slow — usable for short tasks, not snappy chat"
	if active <= 4:
	return "Fast — replies feel instant"
	if active <= 14:
	return "Comfortable — quick enough for live chat"
	return "Steady — fine, just not instant on big answers"


	def _eval_gguf(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
	"""Verdict for an LLM/VLM entry with a real quant ladder."""
	fast, total = spec.fast_budget_gb, spec.total_budget_gb
	quants = sorted(entry.get("quants", []), key=lambda q: _quant_rank(q["key"]))
	ctx, factor = uc.context_tokens, uc.overhead_factor

	# Fast path: best quality quant >= 4-bit that fits the GPU budget.
	if spec.has_fast_path:
	for q in quants:
	if _quant_rank(q["key"]) > _FOUR_BIT_RANK:
	break # don't call a sub-4-bit squeeze "runs great"
	est = _estimate(entry, q, ctx, factor)
	if est["total"] <= fast * _SAFETY_FILL:
	return {"verdict": "great", "quant": q, "est": est}

	# Compromise: spill into ordinary RAM, shrinking quality only if needed.
	for qkey in _COMPROMISE_QUANTS:
	q = next((x for x in quants if x["key"] == qkey), None)
	if not q:
	continue
	est = _estimate(entry, q, ctx, factor)
	if est["total"] <= total * _SAFETY_FILL:
	return {"verdict": "tight", "quant": q, "est": est}

	q = quants[-1] if quants else {"key": "Q4_K_M", "plain": "Balanced (4-bit)",
	"file_gb": entry.get("params_b", 4) * 0.6}
	return {"verdict": "no", "quant": q, "est": _estimate(entry, q, ctx, factor)}


	def _eval_flat(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
	"""Verdict for a non-GGUF entry with one memory figure."""
	need = round(entry.get("mem_gb", 4.0) * uc.overhead_factor, 2)
	fast, total = spec.fast_budget_gb, spec.total_budget_gb
	est = {"weights": need, "kv": 0.0, "overhead": 0.0, "total": need, "kv_exact": False}
	setting = {"key": "full", "plain": "Full model", "file_gb": need}
	if spec.has_fast_path and need <= fast * _SAFETY_FILL:
	return {"verdict": "great", "quant": setting, "est": est}
	# Image/video generation without a GPU is minutes-per-image: say so.
	if entry["family"] == "imagegen" and not spec.has_fast_path and need > 4:
	return {"verdict": "no", "quant": setting, "est": est}
	if need <= total * _SAFETY_FILL:
	return {"verdict": "tight", "quant": setting, "est": est}
	return {"verdict": "no", "quant": setting, "est": est}


	def _evaluate(entry: dict, spec: HardwareSpec, uc: UC) -> dict:
	if entry.get("quants"):
	r = _eval_gguf(entry, spec, uc)
	else:
	r = _eval_flat(entry, spec, uc)
	r["entry"] = entry
	return r


	# --------------------------------------------------------------------------
	# Advise: full UI-shaped result
	# --------------------------------------------------------------------------

	def _speed_pred(r: dict, spec: HardwareSpec, bw: float \| None) -> dict \| None:
	"""Measured/roofline tok/s prediction for a GGUF option, if bandwidth known."""
	e, v, est = r["entry"], r["verdict"], r["est"]
	if not e.get("quants") or v == "no" or not bw:
	return None
	params = e.get("params_b") or 1.0
	active = (e.get("active_params_b") or params) / params
	if v == "tight":
	# share of the read bytes that live in slow system RAM
	fast_room = spec.fast_budget_gb * _SAFETY_FILL
	offload = max(0.0, min(1.0, 1 - fast_room / max(est["total"], 0.1)))
	else:
	offload = 0.0
	return predict_decode_tps(
	bandwidth_gbs=bw, weights_gb=est["weights"], kv_gb=est["kv"],
	active_fraction=active, offload_fraction=offload,
	)


	def _option_json(r: dict, spec: HardwareSpec, bw: float \| None = None) -> dict:
	e, v = r["entry"], r["verdict"]
	pred = _speed_pred(r, spec, bw)
	feel = feel_text(pred) if pred else _feel(e, v, spec)
	if not e.get("quants") and v == "tight" and not spec.has_fast_path:
	feel = "Runs on the processor — slow but workable"
	lic_label = e.get("license", "")
	return {
	"verdict": v,
	"model": e["name"],
	"desc": e.get("good_for", ""),
	"setting": r["quant"].get("plain", "Full model"),
	"memory": "Too big" if v == "no" else f"{r['est']['total']:g} GB",
	"feel": feel,
	"params_b": e.get("params_b"),
	"active_params_b": e.get("active_params_b"),
	"url": (e.get("links") or {}).get("hf") or (e.get("links") or {}).get("home", ""),
	"license": lic_label,
	"license_note": e.get("license_note", ""),
	"gated": e.get("gated", False),
	"run": e.get("run", {}),
	"provenance": e.get("provenance", "estimated"),
	"stale": e.get("stale", False),
	}


	def _pick_headline(results: list[dict], uc: UC) -> tuple[dict \| None, bool]:
	great = [r for r in results if r["verdict"] == "great"]
	tight = [r for r in results if r["verdict"] == "tight"]

	def params(r):
	return r["entry"].get("params_b", 0)

	great_ok = [r for r in great if params(r) >= uc.min_b]
	tight_ok = [r for r in tight if params(r) >= uc.min_b]
	if great_ok:
	# Fast-and-capable is the best answer: biggest model that runs great.
	return max(great_ok, key=params), True
	if tight_ok:
	if uc.good_b > 0:
	# LLMs: close to the ideal size, not needlessly oversized-and-slow.
	below = [r for r in tight_ok if params(r) <= uc.good_b * 1.5]
	return (max(below, key=params) if below else min(tight_ok, key=params)), True
	# Non-LLM families: the biggest model that fits is simply the best one.
	return max(tight_ok, key=params), True
	if great:
	return max(great, key=params), False
	if tight:
	return min(tight, key=params), False
	return None, False


	def _provenance_line(headline: dict \| None) -> str:
	if not headline:
	return ""
	e = headline["entry"]
	prov = e.get("provenance", "estimated")
	if prov == "filesize":
	line = ("Model size is the exact file size on Hugging Face. Chat memory and "
	"working space are conservative estimates with a 0.58 GB safety buffer "
	"(the 95% load-success margin fitted from ~19,500 real measurements).")
	if not headline["est"].get("kv_exact"):
	line += " This repo hides its exact shape, so chat memory is estimated from its size."
	return line
	if prov == "vendor":
	return "The memory figure is the maker's own published number."
	if prov == "community":
	return "The memory figure is community-reported, not vendor-published — treat it as a good estimate."
	return "The memory figure is estimated from the model's size — conservative, not measured."


	def advise_real(payload: dict, spec: HardwareSpec) -> dict:
	uc = USE_CASES.get(payload.get("usecase", "chat"), USE_CASES["chat"])
	candidates = _by_use_case().get(uc.key, [])

	# Honest gap, not a fake answer: if the catalogue doesn't cover a goal yet,
	# say so and point at the live lookup instead of inventing options.
	if not candidates:
	return {
	"catalogue_version": catalogue_date(),
	"verdict": "tight", "verdict_word": "Not covered yet",
	"headline": "Our catalogue doesn't cover this goal yet.",
	"detail": ("FitCheck only answers from verified model data, and nothing in the "
	"current catalogue serves this goal — so rather than guess, we'd "
	"rather say so. If you know a specific model for it, paste its "
	"Hugging Face id in the <b>'Have a specific model in mind?'</b> box "
	"and we'll check that exact model against your machine."),
	"note": "The catalogue grows every night; niche goals are next in line.",
	"gauge": {}, "options": [], "tools": _TOOLS.get(uc.family, []),
	"commands": {"intro": "", "items": []}, "provenance": "",
	"meets_goal": False, "use_case": uc.plain_name,
	}

	results = [_evaluate(e, spec, uc) for e in candidates]

	fast, total = spec.fast_budget_gb, spec.total_budget_gb
	headline, meets_goal = _pick_headline(results, uc)

	bw, bw_src = bandwidth_for_spec(spec)
	options = [_option_json(r, spec, bw) for r in results]

	if headline:
	e, est, q = headline["entry"], headline["est"], headline["quant"]
	hv = headline["verdict"]
	need = est["total"]
	where = ("on your Mac" if spec.is_apple_silicon and hv == "great" else
	"on your graphics card" if hv == "great" and spec.has_fast_path else
	"using your computer's memory" if hv == "tight" else "")
	if hv == "great":
	head_text = f"Yes, you can run {e['name']} {where}, today."
	else:
	head_text = f"Sort of. {e['name']} will run {where}, with trade-offs."
	if e.get("quants"):
	detail = (
	f"For this goal, the honest pick is <b>{e['name']}</b> at the "
	f"<b>{q.get('plain', q['key'])}</b> setting. {e.get('good_for','')} "
	f"It needs about <b>{need:g} GB</b> "
	f"(the model file is {est['weights']:g} GB — exact size on Hugging Face — "
	f"plus {est['kv']:g} GB chat memory and {est['overhead']:g} GB working space), "
	f"and you have roughly <b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total."
	)
	else:
	detail = (
	f"For this goal, the honest pick is <b>{e['name']}</b>. "
	f"{e.get('good_for','')} It needs about <b>{need:g} GB</b>, and you have "
	f"roughly <b>{fast:g} GB</b> fast / <b>{total:g} GB</b> total."
	)
	model_part, work_part = est["weights"], round(need - est["weights"], 2)
	else:
	hv = "no"
	smallest = min(results, key=lambda r: r["est"]["total"], default=None)
	need = smallest["est"]["total"] if smallest else 1.0
	head_text = "This goal is a stretch on this machine. Here's the honest picture."
	detail = (
	f"Even the lightest option here needs about <b>{need:g} GB</b>, but this "
	f"machine can offer only about <b>{total:g} GB</b> once the operating system "
	f"has its share. That's not a failure — small computers just have small "
	f"budgets. Adding memory, or a free cloud notebook, would open this up."
	)
	model_part, work_part = round(need * 0.8, 2), round(need * 0.2, 2)

	note_bits = []
	if headline and not meets_goal:
	note_bits.append(
	f"This is the best this machine can do, but it's on the small side for "
	f"{uc.plain_name.lower()} — treat results as 'okay', not great.")
	if uc.note:
	note_bits.append(uc.note)
	if headline and headline["entry"].get("mem_note"):
	note_bits.append(headline["entry"]["mem_note"])
	if headline and headline["entry"].get("license_note"):
	note_bits.append(headline["entry"]["license_note"])
	if headline and headline["entry"].get("gated"):
	note_bits.append("This model is gated: accept its terms on Hugging Face once before downloading.")

	scale = max(total, need, 1) * 1.05
	gauge = {
	"need_gb": f"{need:g} GB needed",
	"fast_gb": f"{fast:g} GB", "total_gb": f"{total:g} GB",
	"fill_pct": round(min(need / scale, 1.0) * 100, 1),
	"mark_pct": round(min(fast / scale, 1.0) * 100, 1),
	"breakdown": [
	{"label": f"Model {model_part:g} GB", "color": _C_MODEL},
	{"label": f"Chat memory + working space {work_part:g} GB", "color": _C_WORK},
	],
	}

	speed = None
	if headline:
	pred = _speed_pred(headline, spec, bw)
	if pred:
	speed = {**pred, "bw": bw, "bw_source": bw_src,
	"model": headline["entry"]["name"]}

	if uc.family == "llm":
	tools = [{"name": r.name, "what": r.plain_what, "install": r.install_hint,
	"tag": r.difficulty} for r in pick_runtimes(spec)]
	else:
	tools = _TOOLS.get(uc.family, [])

	commands = {"intro": "These get you running in minutes — real commands for the exact pick above.",
	"items": []}
	if headline:
	run = headline["entry"].get("run", {})
	if run.get("ollama"):
	commands["items"].append({"label": "Easy way (Ollama)", "code": run["ollama"]})
	if run.get("llamacpp"):
	commands["items"].append({"label": "Power way (llama.cpp)", "code": run["llamacpp"]})
	if run.get("pip"):
	commands["items"].append({"label": "Install", "code": run["pip"]})

	return {
	"catalogue_version": catalogue_date(),
	"verdict": hv,
	"verdict_word": _VERDICT_WORD[hv],
	"headline": head_text,
	"detail": detail,
	"note": " ".join(note_bits),
	"gauge": gauge,
	"options": options,
	"tools": tools,
	"commands": commands,
	"provenance": _provenance_line(headline) + (
	f" Speed is {'predicted from real community measurements' if speed and speed['method'] == 'measured-model' else 'an analytical bandwidth estimate'}"
	f" — see 'Why this speed?' below." if speed else ""),
	"speed": speed,
	"meets_goal": meets_goal,
	"use_case": uc.plain_name,
	"headline_model": headline["entry"]["name"] if headline else "",
	}


	# --------------------------------------------------------------------------
	# Reverse mode: "what machine do I need for X?"
	# --------------------------------------------------------------------------

	# Ladders are cheap -> expensive. Budget hints are rough 2026 street prices for
	# a whole sensible build, shown as guidance, not gospel.
	_PC_LADDER = [
	("Any old laptop (8 GB RAM, no GPU)", dict(ram_gb=8, vram_gb=0, vendor="none"), "what you may already own"),
	("16 GB RAM laptop, no GPU", dict(ram_gb=16, vram_gb=0, vendor="none"), "~$500"),
	("16 GB RAM + RTX 4060 (8 GB)", dict(ram_gb=16, vram_gb=8, vendor="nvidia"), "~$800"),
	("16 GB RAM + RTX 3060 (12 GB)", dict(ram_gb=16, vram_gb=12, vendor="nvidia"), "~$900"),
	("32 GB RAM + RTX 5070 (12 GB)", dict(ram_gb=32, vram_gb=12, vendor="nvidia"), "~$1,300"),
	("32 GB RAM + RTX 5070 Ti (16 GB)", dict(ram_gb=32, vram_gb=16, vendor="nvidia"), "~$1,600"),
	("32 GB RAM + RTX 4090 (24 GB)", dict(ram_gb=32, vram_gb=24, vendor="nvidia"), "~$2,500"),
	("64 GB RAM + RTX 5090 (32 GB)", dict(ram_gb=64, vram_gb=32, vendor="nvidia"), "~$3,500+"),
	]
	_MAC_LADDER = [
	("Mac with 16 GB unified memory", dict(ram_gb=16), "~$1,000"),
	("Mac with 24 GB unified memory", dict(ram_gb=24), "~$1,400"),
	("Mac with 32 GB unified memory", dict(ram_gb=32), "~$1,800"),
	("Mac with 64 GB unified memory", dict(ram_gb=64), "~$2,800"),
	("Mac with 128 GB unified memory", dict(ram_gb=128), "~$4,500+"),
	]


	def _spec_for_tier(kind: str, hw: dict) -> HardwareSpec:
	if kind == "mac":
	return HardwareSpec(os="macos", ram_gb=hw["ram_gb"], gpu_vendor="apple",
	is_apple_silicon=True, form_factor="mac")
	return HardwareSpec(os="windows", ram_gb=hw["ram_gb"],
	gpu_vendor=hw.get("vendor", "none"),
	vram_gb=hw.get("vram_gb", 0.0), form_factor="desktop")


	def min_specs(usecases) -> dict:
	"""For one OR several goals: the cheapest tier where EVERY goal genuinely
	works (the union of requirements, not a sum), the tier where every goal
	runs great, and what each goal would actually run on those tiers.
	Pure engine inversion — fully offline."""
	if isinstance(usecases, str):
	usecases = [usecases]
	seen = set()
	ucs = []
	for u in usecases or ["chat"]:
	uc = USE_CASES.get(u, USE_CASES["chat"])
	if uc.key not in seen:
	seen.add(uc.key)
	ucs.append(uc)

	def walk(kind, ladder):
	minimum = comfortable = None
	for label, hw, price in ladder:
	spec = _spec_for_tier(kind, hw)
	per_goal, all_meet, all_great = [], True, True
	for uc in ucs:
	res = advise_real({"usecase": uc.key}, spec)
	all_meet &= res["meets_goal"] and res["verdict"] in ("great", "tight")
	all_great &= res["meets_goal"] and res["verdict"] == "great"
	per_goal.append({"goal": uc.plain_name,
	"model": res["headline_model"] or "nothing realistic",
	"verdict": res["verdict"]})
	tier = {"label": label, "price": price, "goals": per_goal,
	"runs": "; ".join(f"{g['goal']}: {g['model']}" for g in per_goal)
	if len(per_goal) > 1 else per_goal[0]["model"]}
	if minimum is None and all_meet:
	minimum = tier
	if comfortable is None and all_great:
	comfortable = tier
	if minimum and comfortable:
	break
	return minimum, comfortable

	pc_min, pc_comfy = walk("pc", _PC_LADDER)
	mac_min, mac_comfy = walk("mac", _MAC_LADDER)
	notes = [uc.note for uc in ucs if uc.note]
	return {
	"use_case": " + ".join(uc.plain_name for uc in ucs),
	"goals": [uc.plain_name for uc in ucs],
	"catalogue_version": catalogue_date(),
	"note": " ".join(notes),
	"pc": {"minimum": pc_min, "comfortable": pc_comfy},
	"mac": {"minimum": mac_min, "comfortable": mac_comfy},
	"disclaimer": ("Price hints are rough 2026 street prices for a sensible whole "
	"build — they vary a lot by region and second-hand luck. The "
	"memory math is the same conservative engine as the main check."
	+ (" Tiers are the union of every goal you picked: each one has "
	"to genuinely work." if len(ucs) > 1 else "")),
	}