""" The spec-parser brick: messy human text -> the advisor's form fields. Serves cn0303/fitcheck-spec-parser (Qwen3-1.7B + LoRA, trained in this repo — see scripts/train_spec_lora.py and the model card for the honest eval). Same serving pattern as the narrator: lazy load inside @spaces.GPU, loud errors, no fake fallbacks. Missing info comes back null — the model is specifically gated against inventing specs. """ import json import re import sys from model_brick import _should_load ADAPTER_ID = "cn0303/fitcheck-spec-parser" BASE_ID = "unsloth/Qwen3-1.7B" # MUST stay in sync with scripts/build_spec_dataset.py (the training prompt). SYSTEM_PROMPT = """\ You turn a person's description of their computer into JSON for a hardware checker. Output ONLY a JSON object with exactly these fields: {"computer": "Windows laptop"|"Windows desktop"|"Mac"|"Linux PC"|"Mini PC / Raspberry Pi"|null, "ram_gb": number|null, "provider": "nvidia"|"amd"|"apple"|"intel"|"none"|null, "gpu": string|null, "vram_gb": number|null} Rules: - Extract ONLY what the text states or directly implies. Anything not stated is null. Never guess or invent a spec. - "provider": "none" ONLY when the text says there is no separate graphics card (e.g. "no GPU", "integrated only"). Graphics simply not mentioned or unknown -> null. - "gpu" must be a specific model (e.g. "RTX 3060"). A brand or series alone ("geforce", "gtx", "radeon") is NOT a gpu -> set provider, leave gpu null. - If the text describes two or more different machines or a choice between them, every field is null.""" FIELDS = ("computer", "ram_gb", "provider", "gpu", "vram_gb") _GENERATE = None _state = {"tok": None, "model": None} if _should_load(): try: import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer def _load(): from peft import PeftModel tok = AutoTokenizer.from_pretrained(ADAPTER_ID) model = AutoModelForCausalLM.from_pretrained(BASE_ID, dtype=torch.bfloat16) model = PeftModel.from_pretrained(model, ADAPTER_ID) _state["tok"] = tok _state["model"] = model.to("cuda").eval() @spaces.GPU(duration=120) # cold path = 3.4GB download + load + generate def _generate(text: str) -> str: if _state["model"] is None: _load() tok, model = _state["tok"], _state["model"] msgs = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": text}] kw = dict(add_generation_prompt=True, return_tensors="pt", return_dict=True) try: inputs = tok.apply_chat_template(msgs, enable_thinking=False, **kw) except TypeError: inputs = tok.apply_chat_template(msgs, **kw) inputs = inputs.to("cuda") n = inputs["input_ids"].shape[1] with torch.no_grad(): out = model.generate(**inputs, max_new_tokens=96, do_sample=False, pad_token_id=tok.eos_token_id) return tok.decode(out[0][n:], skip_special_tokens=True).strip() _GENERATE = _generate except Exception as e: # noqa: BLE001 print(f"[FitCheck] spec parser unavailable: {e!r}", file=sys.stderr, flush=True) def parse_specs(text: str) -> dict: """Returns the parsed fields, or {error} — never invented content.""" text = (text or "").strip() if not text: return {"error": "Nothing to parse — paste or type a description first."} if len(text) > 4000: text = text[:4000] if _GENERATE is None: return {"error": "The spec parser model isn't loaded in this environment."} try: raw = _GENERATE(text) except Exception as e: # noqa: BLE001 return {"error": f"Spec parser failed: {e}"} m = re.search(r"\{.*\}", raw, re.DOTALL) if not m: return {"error": f"The parser didn't return JSON. Raw output: {raw[:200]}"} try: obj = json.loads(m.group(0)) except json.JSONDecodeError: return {"error": f"The parser returned malformed JSON: {m.group(0)[:200]}"} return {f: obj.get(f) for f in FIELDS}