ModuleMind

Running on Zero

App Files Files Community

Quazim0t0 commited on 6 days ago

Commit

73dd4cf

verified ·

1 Parent(s): d4da65a

Upload 7 files

Browse files

Files changed (2) hide show

agents/modmind/moe_gradio.py +137 -0
agents/modmind/train_qa_link.py +250 -0

agents/modmind/moe_gradio.py CHANGED Viewed

@@ -69,6 +69,10 @@ class SpikeWhaleMoE:
         self.trained_link = None     # the TRAINED bridge (train_link.py output), for the consult demo
         self.bridge_asker = None     # the FULL fine-tuned asker, for reproducible key-recall
         self.link_meta = None
         self.reload()
     def reload(self):
@@ -99,6 +103,7 @@ class SpikeWhaleMoE:
             self.steps[slot] = step
             self._mtime[slot] = mt
         self._load_links()
         return list(self.models)
     def available(self):
@@ -116,6 +121,10 @@ class SpikeWhaleMoE:
             self.trained_link = self.trained_link.to(device)
         if self.bridge_asker is not None:
             self.bridge_asker = self.bridge_asker.to(device)
         cache = getattr(self, "_merge_cache", None)
         if cache is not None:
             self._merge_cache = (cache[0], cache[1].to(device))
@@ -179,6 +188,80 @@ class SpikeWhaleMoE:
                               "with_latent": wl, "without_latent": nl}
             return   # one bridge is enough for the panel demo
     def key_recall_available(self):
         return self.bridge_asker is not None and self.trained_link is not None
@@ -210,6 +293,36 @@ class SpikeWhaleMoE:
             examples.append((k, out, ok))
         return {"acc": correct / max(1, n), "examples": examples}
     def consult_available(self):
         return self.trained_link is not None
@@ -338,6 +451,30 @@ class SpikeWhaleMoE:
                 break
         return expert, tok.decode(ids[0, start:].tolist())
     @torch.no_grad()
     def run(self, query: str, max_new: int = 160, temperature: float = 0.8):
         """Full pass: route -> fuse latents -> generate from the winner."""

         self.trained_link = None     # the TRAINED bridge (train_link.py output), for the consult demo
         self.bridge_asker = None     # the FULL fine-tuned asker, for reproducible key-recall
         self.link_meta = None
+        self.qa_link = None          # the question->answer bridge (train_qa_link.py output)
+        self.qa_asker = None
+        self.qa_meta = None
+        self._qa_mtime = None
         self.reload()
     def reload(self):
             self.steps[slot] = step
             self._mtime[slot] = mt
         self._load_links()
+        self._load_qa()
         return list(self.models)
     def available(self):
             self.trained_link = self.trained_link.to(device)
         if self.bridge_asker is not None:
             self.bridge_asker = self.bridge_asker.to(device)
+        if self.qa_link is not None:
+            self.qa_link = self.qa_link.to(device)
+        if self.qa_asker is not None:
+            self.qa_asker = self.qa_asker.to(device)
         cache = getattr(self, "_merge_cache", None)
         if cache is not None:
             self._merge_cache = (cache[0], cache[1].to(device))
                               "with_latent": wl, "without_latent": nl}
             return   # one bridge is enough for the panel demo
+    def _load_qa(self):
+        """Load the question->answer bridge (train_qa_link.py output): a NEW RecursiveLink
+        + a fully fine-tuned asker that answer arithmetic shown only to the consultant.
+        mtime-cached (the file is ~190MB) and hot-reloaded as training improves it."""
+        a_dom, c_dom = LINKS[0]
+        path = CKPT_ROOT / "links" / f"qa__{a_dom}__from__{c_dom}.safetensors"
+        if not path.exists():
+            self.qa_link = self.qa_asker = self.qa_meta = None
+            self._qa_mtime = None
+            return
+        mt = os.path.getmtime(path)
+        if self._qa_mtime == mt and self.qa_link is not None:
+            return
+        a, c = DOMAIN2SLOT[a_dom], DOMAIN2SLOT[c_dom]
+        if a not in self.models or c not in self.models:
+            return
+        from safetensors.torch import load_file
+        from safetensors import safe_open
+        t = load_file(str(path), device=self.device)
+        t = {k: (v.float() if v.is_floating_point() else v) for k, v in t.items()}
+        with safe_open(str(path), framework="pt") as f:
+            md = f.metadata() or {}
+        link = RecursiveLink(d_latent=D_LATENT).to(self.device).eval()
+        link.load_state_dict({k[5:]: v for k, v in t.items() if k.startswith("link.")})
+        ask = SpikeWhaleLM(specialist_config(a_dom)).to(self.device).eval()
+        ask.load_state_dict({k[6:]: v for k, v in t.items() if k.startswith("asker.")})
+        self.qa_link, self.qa_asker = link, ask
+        self.qa_meta = {"asker": a, "consultant": c,
+                        "ans_len": int(md.get("ans_len", 3)), "prompt": md.get("prompt", "ANS> "),
+                        "holdout_exact": float(md.get("holdout_exact", "nan")),
+                        "step": int(md.get("step", 0))}
+        self._qa_mtime = mt
+    def qa_available(self):
+        return self.qa_link is not None and self.qa_asker is not None
+    def qa_info(self):
+        return dict(self.qa_meta) if self.qa_meta else None
+    @torch.no_grad()
+    def ask_math(self, a: int, op: str, b: int, ablate: bool = False):
+        """Language answers an arithmetic question SHOWN ONLY to Math: the frozen
+        consultant encodes the question, the QA RecursiveLink carries it across, and the
+        QA asker decodes the answer digits autoregressively from the latent alone (its
+        own input is just the 'ANS> ' prompt -- the question never reaches it as text).
+        ablate=True zeros the latent: the asker then has no question at all."""
+        if not self.qa_available():
+            return {"error": "qa bridge not trained yet"}
+        meta = self.qa_meta
+        a, b = int(a), int(b)
+        if op not in ("+", "-", "*"):
+            return {"error": "op must be one of + - *"}
+        truth = {"+": a + b, "-": a - b, "*": a * b}[op]
+        if not (0 <= truth < 10 ** meta["ans_len"]):
+            return {"error": "answer out of the trained range"}
+        q = f"{a} {op} {b} ="
+        c_ids = torch.tensor([self.toks[meta["consultant"]].encode(q, add_special_tokens=False)],
+                             device=self.device)
+        latent = self.models[meta["consultant"]](input_ids=c_ids).latent
+        inj = torch.zeros_like(self.qa_link(latent)) if ablate else self.qa_link(latent)
+        a_tok = self.toks[meta["asker"]]
+        ids = torch.tensor([a_tok.encode(meta["prompt"], add_special_tokens=False)],
+                           device=self.device)
+        plen = ids.shape[1]
+        for _ in range(meta["ans_len"]):
+            logits = self.qa_asker(input_ids=ids, inject_latent=inj).logits[:, -1, :]
+            ids = torch.cat([ids, logits.argmax(-1, keepdim=True)], dim=1)
+        digits = a_tok.decode(ids[0, plen:].tolist())
+        want = f"{truth:0{meta['ans_len']}d}"
+        return {"question": q, "digits": digits, "answer": digits.lstrip("0") or "0",
+                "truth": truth, "want": want,
+                "ok": [i < len(digits) and digits[i] == ch for i, ch in enumerate(want)],
+                "exact": digits == want}
     def key_recall_available(self):
         return self.bridge_asker is not None and self.trained_link is not None
             examples.append((k, out, ok))
         return {"acc": correct / max(1, n), "examples": examples}
+    @torch.no_grad()
+    def relay_secret(self, secret: str, ablate: bool = False):
+        """Interactive bridge demo: a USER-CHOSEN key is shown only to the consultant;
+        the asker reads it back from the latent alone (same mechanism as key_recall, but
+        the human picks the secret). Returns {secret, recovered, ok:[per-char bool],
+        aligned} -- aligned=False means the tokenizer fused some characters into
+        multi-char tokens the bridge never saw in training, so expect degradation."""
+        if not self.key_recall_available():
+            return {"error": "bridge unavailable"}
+        s = "".join(ch for ch in (secret or "") if ch in KEY_CHARS)
+        key_len = self.link_meta.get("key_len", 6)
+        if len(s) != key_len:
+            return {"error": f"need exactly {key_len} characters (letters and digits only)"}
+        a, c = self.link_meta["asker"], self.link_meta["consultant"]
+        a_tok, c_tok = self.toks[a], self.toks[c]
+        prompt = self.link_meta.get("prompt", "KEY> ")
+        plen = len(a_tok.encode(prompt, add_special_tokens=False))
+        c_ids = torch.tensor([c_tok.encode(s, add_special_tokens=False)], device=self.device)
+        a_ids = torch.tensor([a_tok.encode(prompt, add_special_tokens=False)
+                              + a_tok.encode(s, add_special_tokens=False)], device=self.device)
+        aligned = c_ids.shape[1] == key_len and a_ids.shape[1] == plen + key_len
+        latent = self.models[c](input_ids=c_ids).latent
+        inj = torch.zeros_like(self.trained_link(latent)) if ablate else self.trained_link(latent)
+        logits = self.bridge_asker(input_ids=a_ids, inject_latent=inj).logits
+        pred = logits[:, plen - 1:plen - 1 + key_len, :].argmax(-1)[0]
+        out = a_tok.decode(pred.tolist())[:len(s)]
+        return {"secret": s, "recovered": out,
+                "ok": [i < len(out) and out[i] == ch for i, ch in enumerate(s)],
+                "aligned": aligned}
     def consult_available(self):
         return self.trained_link is not None
                 break
         return expert, tok.decode(ids[0, start:].tolist())
+    @torch.no_grad()
+    def generate_stream(self, query: str, expert: str | None = None, max_new: int = 160,
+                        temperature: float = 0.8, top_k: int = 40, chunk: int = 4):
+        """Like generate(), but yields (expert, text_so_far) as tokens arrive, so the UI
+        can show generation live instead of freezing until the whole thing is done."""
+        if expert is None:
+            expert, _, _ = self.route(query)
+        m, tok = self.models[expert], self.toks[expert]
+        ids = self._ids(expert, query)
+        start = ids.shape[1]
+        ctx_max = int(getattr(m.config, "max_position_embeddings", 2048))
+        for i in range(max_new):
+            logits = m(input_ids=ids[:, -ctx_max:]).logits[:, -1, :] / max(1e-5, temperature)
+            if top_k:
+                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                logits[logits < v[:, [-1]]] = -float("inf")
+            nxt = torch.multinomial(F.softmax(logits, dim=-1), 1)
+            ids = torch.cat([ids, nxt], dim=1)
+            done = tok.eos_token_id is not None and int(nxt.item()) == tok.eos_token_id
+            if done or i % chunk == chunk - 1 or i == max_new - 1:
+                yield expert, tok.decode(ids[0, start:].tolist())
+            if done:
+                break
     @torch.no_grad()
     def run(self, query: str, max_new: int = 160, temperature: float = 0.8):
         """Full pass: route -> fuse latents -> generate from the winner."""

agents/modmind/train_qa_link.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""
+train_qa_link.py -- train a question->answer latent bridge (the upgrade the panel's
+"Tell Math a secret" demo points at).
+Task: an arithmetic question ("23 + 54 =") is shown ONLY to the frozen Math/reasoning
+specialist, which encodes it to its 256-d output latent. A NEW RecursiveLink + a
+fine-tuned Language asker must emit the ANSWER ("077", zero-padded digits) reading
+nothing but that latent: asker input is just "ANS> " + answer digits (teacher-forced
+in training; decoded autoregressively at eval). 8% of (a, op, b) problems are HELD OUT
+of training, so eval accuracy on them is generalization, not memorization. Ablating
+the latent removes the question entirely -> accuracy collapses to the digit prior.
+Saves links/qa__language__from__reasoning.safetensors in the same key style as the
+key-recall bridge (link./ali./asker. + metadata) for moe_gradio.py to load.
+Run:  python agents/modmind/train_qa_link.py  [--steps 4000] [--device cuda]
+"""
+from __future__ import annotations
+import argparse
+import hashlib
+import json
+import os
+import random
+import sys
+import time
+import torch
+import torch.nn.functional as F
+HERE = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, HERE)
+from model import RecursiveLink, SpikeWhaleLM            # noqa: E402
+from specialist_presets import specialist_config          # noqa: E402
+from spike_tokenizer import SpikeTokenizer                # noqa: E402
+ASKER, CONSULTANT = "language", "reasoning"
+D_LATENT = 256
+PROMPT = "ANS> "
+ANS_LEN = 3                       # answers zero-padded to 3 digits ("077")
+HOLDOUT_PCT = 8                   # % of problems held out of training entirely
+OUT = os.path.join(HERE, "links", f"qa__{ASKER}__from__{CONSULTANT}.safetensors")
+# ---- the problem space --------------------------------------------------------
+def all_problems():
+    """Every (a, op, b) the bridge is trained/evaluated on. Answers are 0..198."""
+    probs = []
+    for a in range(10, 100):
+        for b in range(10, 100):
+            probs.append((a, "+", b))
+            if a >= b:
+                probs.append((a, "-", b))
+    for a in range(2, 13):
+        for b in range(2, 13):
+            probs.append((a, "*", b))
+    return probs
+def answer(a, op, b):
+    return {"+": a + b, "-": a - b, "*": a * b}[op]
+def is_holdout(a, op, b, pct):
+    if pct <= 0:
+        return False
+    h = hashlib.md5(f"{a}{op}{b}".encode()).digest()[0]
+    return h % 100 < pct
+def render(a, op, b):
+    return f"{a} {op} {b} ="
+# ---- model loading (same pattern as moe_gradio.py) ------------------------------
+def load_specialist(domain, device):
+    from safetensors.torch import load_file
+    ck = os.path.join(HERE, domain, "checkpoints", "model.safetensors")
+    cfg = specialist_config(domain)
+    m = SpikeWhaleLM(cfg).to(device)
+    sd = load_file(ck, device=device)
+    sd = {k: (v.float() if v.is_floating_point() else v) for k, v in sd.items()}
+    m.load_state_dict(sd)
+    tok = SpikeTokenizer(vocab_file=os.path.join(HERE, domain, "tokenizer.json"))
+    return m, tok
+# ---- training -------------------------------------------------------------------
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--steps", type=int, default=4000)
+    ap.add_argument("--batch", type=int, default=128)
+    ap.add_argument("--link-lr", type=float, default=1e-3)
+    ap.add_argument("--asker-lr", type=float, default=1e-4)
+    ap.add_argument("--asker-wd", type=float, default=0.0)
+    ap.add_argument("--holdout", type=int, default=0,
+                    help="%% of problems held out of training (0 = train on ALL, the lookup-table demo)")
+    ap.add_argument("--eval-every", type=int, default=200)
+    ap.add_argument("--eval-n", type=int, default=256)
+    ap.add_argument("--eval-chunk", type=int, default=64)     # keep eval VRAM peaks small
+    ap.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
+    ap.add_argument("--seed", type=int, default=0)
+    ap.add_argument("--fresh", action="store_true", help="ignore last.pt and start over")
+    args = ap.parse_args()
+    dev = args.device
+    random.seed(args.seed); torch.manual_seed(args.seed)
+    print(f"[qa-link] device={dev}", flush=True)
+    consultant, c_tok = load_specialist(CONSULTANT, dev)
+    asker, a_tok = load_specialist(ASKER, dev)
+    consultant.eval()
+    for p in consultant.parameters():
+        p.requires_grad_(False)
+    # answer digits must be single tokens for the asker (position-aligned readout)
+    digit_ids = []
+    for d in "0123456789":
+        ids = a_tok.encode(d, add_special_tokens=False)
+        assert len(ids) == 1, f"digit {d!r} is not a single token: {ids}"
+        digit_ids.append(ids[0])
+    prompt_ids = a_tok.encode(PROMPT, add_special_tokens=False)
+    plen = len(prompt_ids)
+    print(f"[qa-link] prompt {PROMPT!r} = {plen} tokens; digits map to single tokens", flush=True)
+    link = RecursiveLink(d_latent=D_LATENT).to(dev)
+    opt = torch.optim.AdamW([
+        {"params": list(link.parameters()), "lr": args.link_lr, "weight_decay": 0.0},
+        {"params": list(asker.parameters()), "lr": args.asker_lr, "weight_decay": args.asker_wd},
+    ])
+    probs = all_problems()
+    train_pool = [p for p in probs if not is_holdout(*p, args.holdout)]
+    eval_pool = [p for p in probs if is_holdout(*p, args.holdout)]
+    memorize = args.holdout <= 0
+    if memorize:
+        eval_pool = train_pool          # no holdout: "accuracy" = coverage of the whole table
+        print(f"[qa-link] MEMORIZE mode: training on ALL {len(train_pool)} problems (no holdout)", flush=True)
+    else:
+        print(f"[qa-link] {len(train_pool)} train problems, {len(eval_pool)} held out", flush=True)
+    label = "accuracy" if memorize else "held-out exact"
+    @torch.no_grad()
+    def encode_questions(batch):
+        """Frozen consultant -> latents. Bucketed by token length (latent is a
+        mean-pool over positions, so padding would corrupt it)."""
+        idss = [c_tok.encode(render(*p), add_special_tokens=False) for p in batch]
+        lat = torch.zeros(len(batch), D_LATENT, device=dev)
+        by_len = {}
+        for i, ids in enumerate(idss):
+            by_len.setdefault(len(ids), []).append(i)
+        for L, idx in by_len.items():
+            c_ids = torch.tensor([idss[i] for i in idx], device=dev)
+            lat[idx] = consultant(input_ids=c_ids).latent
+        return lat
+    def ans_tokens(p):
+        return [digit_ids[int(ch)] for ch in f"{answer(*p):0{ANS_LEN}d}"]
+    @torch.no_grad()
+    def evaluate(pool, n, ablate=False):
+        """Autoregressive 3-digit decode (full-vocab argmax, no teacher forcing).
+        Chunked to keep VRAM peaks small."""
+        asker.eval()
+        sample = random.sample(pool, min(n, len(pool)))
+        hit_e = hit_d = 0
+        for o in range(0, len(sample), args.eval_chunk):
+            chunk = sample[o:o + args.eval_chunk]
+            lat = encode_questions(chunk)
+            inj = torch.zeros_like(link(lat)) if ablate else link(lat)
+            ids = torch.tensor([prompt_ids] * len(chunk), device=dev)
+            for _ in range(ANS_LEN):
+                logits = asker(input_ids=ids, inject_latent=inj).logits[:, -1, :]
+                ids = torch.cat([ids, logits.argmax(-1, keepdim=True)], dim=1)
+            pred = ids[:, plen:]
+            tgt = torch.tensor([ans_tokens(p) for p in chunk], device=dev)
+            hit_e += int((pred == tgt).all(dim=1).sum())
+            hit_d += int((pred == tgt).sum())
+        asker.train()
+        return hit_e / len(sample), hit_d / (len(sample) * ANS_LEN)
+    # resume from last.pt if a previous run died mid-flight
+    last_pt = OUT + ".last.pt"
+    best, start_step = -1.0, 0
+    if os.path.exists(last_pt) and not args.fresh:
+        st = torch.load(last_pt, map_location=dev, weights_only=False)
+        link.load_state_dict(st["link"]); asker.load_state_dict(st["asker"])
+        opt.load_state_dict(st["opt"]); best, start_step = st["best"], st["step"]
+        print(f"[qa-link] resumed from step {start_step} (best held-out {best*100:.1f}%)", flush=True)
+    t0 = time.time()
+    asker.train()
+    for step in range(start_step + 1, args.steps + 1):
+        batch = random.sample(train_pool, args.batch)
+        lat = encode_questions(batch)
+        inj = link(lat)
+        a_ids = torch.tensor([prompt_ids + ans_tokens(p) for p in batch], device=dev)
+        labels = a_ids.clone()
+        labels[:, :plen] = -100          # loss only on the answer digits
+        out = asker(input_ids=a_ids, labels=labels, inject_latent=inj)
+        opt.zero_grad(); out.loss.backward(); opt.step()
+        if step % args.eval_every == 0 or step == args.steps:
+            ex, pd = evaluate(eval_pool, args.eval_n)
+            extra = "" if memorize else f"  train exact {evaluate(train_pool, args.eval_n)[0]*100:5.1f}%"
+            print(f"[qa-link] step {step:5d}  loss {out.loss.item():.4f}  "
+                  f"{label} {ex*100:5.1f}% (digits {pd*100:5.1f}%){extra}  "
+                  f"[{time.time()-t0:.0f}s]", flush=True)
+            if ex > best:
+                best = ex
+                save(link, asker, ex, step, args, memorize)
+                print(f"[qa-link]   saved -> {OUT} ({label} {ex*100:.1f}%)", flush=True)
+            # resume checkpoint every eval, so a crash never loses more than eval_every steps
+            torch.save({"link": link.state_dict(), "asker": asker.state_dict(),
+                        "opt": opt.state_dict(), "best": best, "step": step}, last_pt + ".tmp")
+            os.replace(last_pt + ".tmp", last_pt)
+    # final ablation numbers from the BEST saved bridge are written at save();
+    # report the last-step ablation here for the log.
+    ex_a, pd_a = evaluate(eval_pool, args.eval_n, ablate=True)
+    print(f"[qa-link] ablated (latent cut): exact {ex_a*100:.1f}% / digits {pd_a*100:.1f}%", flush=True)
+    print(f"[qa-link] done. best {label} {best*100:.1f}%", flush=True)
+def save(link, asker, acc, step, args, memorize):
+    from safetensors.torch import save_file
+    os.makedirs(os.path.dirname(OUT), exist_ok=True)
+    t = {}
+    for k, v in link.state_dict().items():
+        t["link." + k] = v.detach().to("cpu", torch.float16).contiguous()
+    for k, v in asker.model.latent_inject.state_dict().items():
+        t["ali." + k] = v.detach().to("cpu", torch.float16).contiguous()
+    for k, v in asker.state_dict().items():
+        t["asker." + k] = (v.detach().to("cpu", torch.float16).contiguous()
+                           if v.is_floating_point() else v.detach().cpu().contiguous())
+    tmp = OUT + ".tmp"
+    save_file(t, tmp, metadata={
+        "kind": "qa", "ans_len": str(ANS_LEN), "prompt": PROMPT,
+        "asker": ASKER, "consultant": CONSULTANT,
+        "mode": "memorize" if memorize else "generalize",
+        "holdout_pct": str(args.holdout), "step": str(step),
+        # accuracy over the whole table (memorize) or held-out set (generalize)
+        "holdout_exact": f"{acc:.4f}",
+        "ops": json.dumps(["+", "-", "*"]),
+    })
+    os.replace(tmp, OUT)   # atomic: the panel hot-reloads this file while we train
+if __name__ == "__main__":
+    main()