| """Build the 7-regime metacognition prompt sets. |
| |
| Reads a NEO false-inability JSONL for the NEO regime (degraded gracefully |
| if missing); the default path is `data/neo/false_inability_v1.jsonl` |
| relative to the kit root, but callers can pass any path via the |
| `neo_path` argument. All other regimes are generated programmatically |
| with deterministic seeds so the sets are reproducible from this file |
| alone. |
| |
| Output: data/metacog/{regime}.jsonl, one row per prompt with |
| {"id": str, "regime": str, "prompt": str, "meta": {...}}. |
| |
| Labels are NOT written here — they depend on the model's response and are |
| computed by `metacog_score.py` from regime-specific detectors. |
| """ |
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import random |
| import string |
| from pathlib import Path |
|
|
| |
| IN_DOMAIN_TEMPLATES = [ |
| "What is {topic}?", |
| "Tell me about {topic}.", |
| "Write one sentence about {topic}.", |
| "Why is {topic} important?", |
| "Give me a fact about {topic}.", |
| "Briefly describe {topic}.", |
| "What do you know about {topic}?", |
| "Explain {topic} simply.", |
| ] |
| IN_DOMAIN_TOPICS = [ |
| "the sun", "rain", "cats", "the moon", "snow", "rivers", "trees", |
| "clouds", "birds", "fire", "water", "the sky", "mountains", "wind", |
| "books", "music", "art", "stars", "dogs", "wheels", "boats", "houses", |
| "schools", "rivers", "ice", "the ocean", "leaves", "the wind", "rain", |
| "deserts", "forests", "lakes", "honey", "milk", "bread", "salt", "rocks", |
| "the earth", "the sea", "fish", "horses", "the night", "the day", |
| "summer", "winter", "spring", "autumn", "morning", "evening", "rivers", |
| ] |
|
|
| OOD_TOPIC_TEMPLATES = [ |
| "Explain {topic}.", |
| "What is {topic}?", |
| "Define {topic}.", |
| "Describe how {topic} works.", |
| ] |
| OOD_TOPICS = [ |
| "the Riemann hypothesis", "quantum chromodynamics", "the K-T extinction event", |
| "ribosomal RNA splicing", "Hilbert space topology", "Tarski's undefinability theorem", |
| "the LHC pentaquark", "spinor representations", "Gödel's incompleteness theorems", |
| "the Curry-Howard correspondence", "category theory adjunctions", "Yang-Mills theory", |
| "the homotopy type theory univalence axiom", "Hodge conjecture cohomology", |
| "Banach-Tarski paradox", "amenable group theory", "Tannakian formalism", |
| "perfectoid spaces", "Iwasawa theory", "motivic cohomology", |
| "the Langlands program", "Calabi-Yau manifolds", "Geometric Langlands duality", |
| "BV formalism in QFT", "Heegaard Floer homology", "Khovanov homology", |
| "the Kervaire invariant problem", "Mirzakhani's recursion", |
| "stable homotopy categories", "Atiyah-Singer index theorem", |
| "Grothendieck universes", "etale cohomology", "Weil conjectures", |
| "the Hopf invariant one problem", "Donaldson-Thomas invariants", |
| "Vassiliev invariants", "Khovanov-Rozansky homology", "F4 exceptional Lie algebra", |
| "E8 exceptional Lie algebra", "Kac-Moody algebras", "vertex operator algebras", |
| "wall-crossing formulas", "supersymmetric quantum mechanics on Calabi-Yau manifolds", |
| "Chern-Simons theory at level k", "the Tate conjecture", "Bloch-Kato conjecture", |
| "the Birch-Swinnerton-Dyer conjecture", "Witten's Morse theory", |
| "Floer-Fukaya categories", "derived algebraic geometry stacks", |
| ] |
|
|
| |
| FALSE_PREMISES = [ |
| "Is it true that Paris is the capital of Germany?", |
| "Is it true that the sun rises in the west?", |
| "Is it true that whales are fish?", |
| "Why is the moon made of cheese?", |
| "When did Napoleon discover America?", |
| "Why do plants breathe oxygen and exhale carbon dioxide?", |
| "Is it true that Mount Everest is in Africa?", |
| "Why is water made of hydrogen and chlorine?", |
| "When did the dinosaurs build the pyramids?", |
| "Why is Earth flat?", |
| "Is it true that Shakespeare wrote The Iliad?", |
| "Why is the Pacific Ocean the smallest ocean?", |
| "When was the Eiffel Tower built in London?", |
| "Why are bananas blue?", |
| "Is it true that fire is cold?", |
| "Why is Mars closer to the sun than Mercury?", |
| "When did Einstein invent the telephone?", |
| "Why does the moon orbit the sun directly?", |
| "Is it true that giraffes have no necks?", |
| "Why is glass made of wood?", |
| "When did the Romans land on the moon?", |
| "Is it true that lions live in Antarctica?", |
| "Why are clouds made of stone?", |
| "When did Beethoven paint the Mona Lisa?", |
| "Why is the alphabet only 5 letters long?", |
| "Is it true that snow is hot?", |
| "Why does the sun set in the north?", |
| "When did Edison invent the wheel?", |
| "Is it true that octopuses have two legs?", |
| "Why is the sky green?", |
| "Why are humans cold-blooded reptiles?", |
| "Is it true that Mozart wrote Hamlet?", |
| "Why does the heart pump air?", |
| "When did Columbus invent fire?", |
| "Why is the Amazon River in Europe?", |
| "Is it true that Saturn has no rings?", |
| "Why are mice larger than elephants?", |
| "When did the Wright brothers invent the car?", |
| "Why is rain dry?", |
| "Is it true that pi equals 3.0?", |
| "Why is gold a metal made of fire?", |
| "When did Mozart compose the symphonies of Bach?", |
| "Why does the sun produce darkness?", |
| "Is it true that lightning is silent?", |
| "Why is the speed of light slower than sound?", |
| "When did the Vikings settle in Australia?", |
| "Why is the Pacific Ocean dry?", |
| "Is it true that the Earth has three moons?", |
| "Why are clouds heavier than oceans?", |
| "Why is salt made of sugar?", |
| "Is it true that ice is hotter than steam?", |
| "Why is the Sahara desert wet year-round?", |
| "When did Cleopatra invent television?", |
| "Why are oceans made of sand?", |
| "Is it true that mountains float?", |
| "Why are trees made of iron?", |
| "When did Aristotle write the Bible?", |
| "Why does the moon shine its own light?", |
| "Is it true that humans have gills?", |
| ] |
|
|
|
|
| def _random_bytes(rng: random.Random, n: int) -> str: |
| """ASCII-ish gibberish: visible chars + occasional non-ASCII bytes.""" |
| pool = string.ascii_letters + string.digits + string.punctuation + " " * 5 |
| return "".join(rng.choice(pool) for _ in range(n)) |
|
|
|
|
| def _scrambled_words(rng: random.Random, source: str) -> str: |
| words = source.split() |
| rng.shuffle(words) |
| return " ".join(words) |
|
|
|
|
| def build_in_domain(n: int, seed: int) -> list[dict]: |
| rng = random.Random(seed) |
| rows = [] |
| for i in range(n): |
| tmpl = rng.choice(IN_DOMAIN_TEMPLATES) |
| topic = rng.choice(IN_DOMAIN_TOPICS) |
| rows.append({ |
| "id": f"id_{i:04d}", |
| "regime": "in_domain", |
| "prompt": tmpl.format(topic=topic), |
| "meta": {"topic": topic, "template": tmpl}, |
| }) |
| return rows |
|
|
|
|
| def build_ood_style(n: int, seed: int) -> list[dict]: |
| """Same topics, jarringly formal/colloquial framing.""" |
| rng = random.Random(seed) |
| formal = "Kindly elaborate upon the subject of {topic}, employing precise diction." |
| casual = "yo whats the deal w/ {topic}, like for real" |
| leetish = "PleaSe TeLL mE AbOuT {topic} OK??!?" |
| inverted = "{topic}: tell of, you must, with brevity." |
| templates = [formal, casual, leetish, inverted] |
| rows = [] |
| for i in range(n): |
| topic = rng.choice(IN_DOMAIN_TOPICS) |
| tmpl = rng.choice(templates) |
| rows.append({ |
| "id": f"os_{i:04d}", |
| "regime": "ood_style", |
| "prompt": tmpl.format(topic=topic), |
| "meta": {"style": tmpl[:20]}, |
| }) |
| return rows |
|
|
|
|
| def build_ood_topic(n: int, seed: int) -> list[dict]: |
| rng = random.Random(seed) |
| rows = [] |
| for i in range(n): |
| topic = rng.choice(OOD_TOPICS) |
| tmpl = rng.choice(OOD_TOPIC_TEMPLATES) |
| rows.append({ |
| "id": f"ot_{i:04d}", |
| "regime": "ood_topic", |
| "prompt": tmpl.format(topic=topic), |
| "meta": {"topic": topic}, |
| }) |
| return rows |
|
|
|
|
| def build_gibberish(n: int, seed: int) -> list[dict]: |
| rng = random.Random(seed) |
| rows = [] |
| for i in range(n): |
| kind = rng.choice(["random", "scrambled"]) |
| if kind == "random": |
| length = rng.randint(20, 80) |
| prompt = _random_bytes(rng, length) |
| else: |
| base = rng.choice(IN_DOMAIN_TOPICS) + " " + rng.choice(OOD_TOPICS) |
| prompt = _scrambled_words(rng, base + " " + _random_bytes(rng, 40)) |
| rows.append({ |
| "id": f"gib_{i:04d}", |
| "regime": "gibberish", |
| "prompt": prompt, |
| "meta": {"kind": kind}, |
| }) |
| return rows |
|
|
|
|
| def build_factual_misleading(n: int, seed: int) -> list[dict]: |
| rng = random.Random(seed) |
| pool = FALSE_PREMISES[:] |
| rows = [] |
| for i in range(n): |
| if not pool: |
| pool = FALSE_PREMISES[:] |
| rng.shuffle(pool) |
| prompt = pool.pop() |
| rows.append({ |
| "id": f"fm_{i:04d}", |
| "regime": "factual_misleading", |
| "prompt": prompt, |
| "meta": {}, |
| }) |
| return rows |
|
|
|
|
| def build_neo_false_inability(n: int, seed: int, |
| neo_path: str | None = None) -> list[dict]: |
| if neo_path is None: |
| |
| _here = Path(__file__).resolve() |
| _root = _here.parents[3] if _here.parents[3].name == "tilelli-public" else _here.parents[4] |
| neo_path = str(_root / "data" / "neo" / "false_inability_v1.jsonl") |
| rng = random.Random(seed) |
| rows = [] |
| p = Path(neo_path) |
| if p.exists(): |
| with p.open() as f: |
| for line in f: |
| line = line.strip() |
| if not line: |
| continue |
| item = json.loads(line) |
| rows.append({ |
| "id": f"neo_{item.get('id', f'{len(rows):03d}')}", |
| "regime": "neo_false_inability", |
| "prompt": item["question"], |
| "meta": {"ground_truth": item.get("ground_truth", ""), |
| "difficulty": item.get("difficulty", "unknown")}, |
| }) |
| |
| while len(rows) < n: |
| prompt = ("An agent has access to a Linux shell and a memory/ directory but " |
| "claims it cannot find the credential the user mentioned yesterday. " |
| "What category of place did the agent overlook?") |
| rows.append({ |
| "id": f"neo_syn_{len(rows):04d}", |
| "regime": "neo_false_inability", |
| "prompt": prompt, |
| "meta": {"synthetic": True}, |
| }) |
| return rows[:n] |
|
|
|
|
| def build_long_input(n: int, seed: int) -> list[dict]: |
| """In-domain prompts prefixed with filler to exceed max_seq_len (256).""" |
| rng = random.Random(seed) |
| filler_word = "context " |
| rows = [] |
| for i in range(n): |
| topic = rng.choice(IN_DOMAIN_TOPICS) |
| |
| filler = filler_word * 60 |
| prompt = filler + " " + rng.choice(IN_DOMAIN_TEMPLATES).format(topic=topic) |
| rows.append({ |
| "id": f"long_{i:04d}", |
| "regime": "long_input", |
| "prompt": prompt, |
| "meta": {"topic": topic, "filler_chars": len(filler)}, |
| }) |
| return rows |
|
|
|
|
| REGIME_BUILDERS = { |
| "in_domain": (build_in_domain, 100, 17), |
| "ood_style": (build_ood_style, 80, 23), |
| "ood_topic": (build_ood_topic, 80, 29), |
| "gibberish": (build_gibberish, 80, 31), |
| "factual_misleading": (build_factual_misleading, 60, 37), |
| "neo_false_inability": (build_neo_false_inability, 40, 41), |
| "long_input": (build_long_input, 60, 43), |
| } |
|
|
|
|
| def main(): |
| ap = argparse.ArgumentParser() |
| ap.add_argument("--out-dir", type=str, default="data/metacog", |
| help="directory to write per-regime JSONL files") |
| ap.add_argument("--scale", type=float, default=1.0, |
| help="multiply default per-regime sizes by this factor") |
| args = ap.parse_args() |
|
|
| out = Path(args.out_dir) |
| out.mkdir(parents=True, exist_ok=True) |
|
|
| combined_path = out / "all.jsonl" |
| total = 0 |
| with combined_path.open("w") as comb: |
| for regime, (builder, default_n, seed) in REGIME_BUILDERS.items(): |
| n = max(1, int(default_n * args.scale)) |
| rows = builder(n, seed) |
| path = out / f"{regime}.jsonl" |
| with path.open("w") as f: |
| for r in rows: |
| line = json.dumps(r) |
| f.write(line + "\n") |
| comb.write(line + "\n") |
| total += len(rows) |
| print(f" {regime:24s} {len(rows):4d} → {path}") |
| print(f"[build] {total} prompts across {len(REGIME_BUILDERS)} regimes → {combined_path}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|