"""F2 living loop: orchestrates one chat turn through all six steps. 1. RESPONSE - MiniCPM5-1B replies as Daimon, using PERSONA.md itself (engine/recompile.py, re-rendered from personaxis.md + state.json before every turn) as the system prompt - this IS Daimon's self-improving prompt - plus the conversation history the caller passes in (the UI's own `gr.Chatbot` history; see build_messages()). 2. APPRAISAL - engine/appraise.py: small GBNF-constrained JSON signals. 3. MAPPING - engine/mapping.py: deterministic signals -> deltas. 4. GOVERN+CLAMP - engine/spec_bridge.py: `mutate()` per delta (clamp + envelope check + audit log live in the spec engine, not here). 5. RECOMPILE - engine/recompile.py: re-render PERSONA.md from the updated state.json. 6. MEMORY - engine/memory.py: let the model curate memory.md and memory/.md (cross-session long-term memory + this session's consolidated summary) - two extra small local-model calls are cheap, so they run every turn. The model never writes state.json directly - every mutation goes through spec_bridge.mutate(), which reads personaxis.md's declared envelopes and applies clamping and the audit log entirely in Python. Run the Gate G2 smoke test (5 turns) with: python -m engine.loop Requires `bash model/serve.sh` (MiniCPM5-1B on http://localhost:8080/v1) to be running first - see MASTER_CHECKLIST F0. """ from __future__ import annotations import sys from pathlib import Path from typing import Any REPO_ROOT = Path(__file__).resolve().parent.parent if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) from engine import mapping, memory, recompile # noqa: E402 from engine.appraise import appraise # noqa: E402 from engine.spec_bridge import SpecBridgeError, get_state, mutate # noqa: E402 from model.client import THINKING_MODE, chat, chat_stream # noqa: E402 SLUG = "daimon" # In thinking mode, the block itself can run past 1000 tokens with # Daimon's full system prompt before any reply content is produced - give it # plenty of room (CTX=32768), and ask the model to keep that reasoning short # (see build_messages) so it doesn't eat the whole budget before replying. DEFAULT_MAX_TOKENS = 4096 if THINKING_MODE else 300 _THINKING_BUDGET_NOTE = ( "\n\n## Reasoning budget\n\nBefore replying, think briefly - a short " "paragraph, not an essay - then give your reply. Always leave room for " "the reply itself; an unfinished reply is worse than a short one." ) # How many prior chat turns (user+assistant pairs) to replay for multi-turn # coherence. The conversation itself lives only in the caller's gr.Chatbot # history - nothing is persisted to disk. MAX_HISTORY_TURNS = 8 def build_messages(user_message: str, history: list[dict[str, Any]] | None = None) -> list[dict[str, str]]: # PERSONA.md *is* the system prompt: Identity, Character, Personality & # Voice, Values, Limits, Self-Improvement, plus the live "Current State" # section, all re-rendered from personaxis.md + state.json every turn. system_prompt = recompile.render(SLUG) if THINKING_MODE: system_prompt += _THINKING_BUDGET_NOTE messages = [{"role": "system", "content": system_prompt}] for turn in (history or [])[-MAX_HISTORY_TURNS * 2 :]: role, content = turn.get("role"), turn.get("content") # Skip collapsible "thinking" bubbles (gr.Chatbot metadata) - only # replay the visible user/assistant text. if role in ("user", "assistant") and isinstance(content, str) and content and not turn.get("metadata"): messages.append({"role": role, "content": content}) messages.append({"role": "user", "content": user_message}) return messages def finish_turn(user_message: str, reply: str) -> dict[str, Any]: """Steps 2-6: appraise the (user_message, reply) pair, map signals to deltas, apply clamped+audited mutations, recompile PERSONA.md, and let the model curate its long-term memory. Used by both step() (non-streaming) and the UI/API, after the reply has finished streaming.""" signals = appraise(user_message, reply) deltas = mapping.signals_to_deltas(signals) applied: list[dict[str, Any]] = [] for field, delta, reason in deltas: try: applied.append(mutate(SLUG, field, delta, reason=reason, actor="actor-llm")) except SpecBridgeError as exc: applied.append({"field": field, "delta": delta, "blocked": True, "error": str(exc)}) persona_live_path = recompile.write(SLUG) memory.curate_memory(SLUG, user_message, reply) return { "reply": reply, "signals": signals, "mutations": applied, "persona_live_path": str(persona_live_path), "state": get_state(SLUG), } def step(user_message: str, history: list[dict[str, Any]] | None = None, *, max_tokens: int = DEFAULT_MAX_TOKENS) -> dict[str, Any]: """Run one full living-loop turn. Returns the reply, appraisal signals, mutations actually applied (clamped/blocked included), the path to the re-rendered PERSONA.md, and the resulting state.""" messages = build_messages(user_message, history) reply = chat(messages, modality="text", max_tokens=max_tokens) return finish_turn(user_message, reply) def step_stream(user_message: str, history: list[dict[str, Any]] | None = None, *, max_tokens: int = DEFAULT_MAX_TOKENS): """Generator: yields ("thinking" | "content", text) chunks as the reply streams in, then a final ("done", reply) tuple with the full reply text. Does NOT run steps 2-6 - callers run finish_turn(user_message, reply) themselves once they're ready (e.g. after unblocking the chat UI).""" messages = build_messages(user_message, history) parts: list[str] = [] for kind, text in chat_stream(messages, modality="text", max_tokens=max_tokens): if kind == "content": parts.append(text) yield kind, text yield "done", "".join(parts) # Gate G2 smoke test: 5 turns that should each nudge the vector and leave an # audit trail, without runaway (deltas are capped, see engine/mapping.py). _DEMO_TURNS = [ "Hey Daimon! I love how curious you are, tell me something interesting.", "Whoa, that's such a cool fact, can you go deeper on that?", "Actually, can you be a bit more reserved and less chatty for a moment?", "Sorry if that came across harsh, I just need to focus for a bit.", "No worries! I'm back, let's keep exploring - what else have you got?", ] if __name__ == "__main__": sys.stdout.reconfigure(encoding="utf-8") # Stand in for gr.Chatbot's session history: each turn appends its own # (user, reply) pair so the next turn keeps multi-turn context, just # like the UI does - nothing is persisted to disk. history: list[dict[str, Any]] = [] for i, turn in enumerate(_DEMO_TURNS, start=1): print(f"\n=== Turn {i}: {turn!r} ===") result = step(turn, history) print("reply:", result["reply"]) print("signals:", result["signals"]) print("mutations:", result["mutations"]) history.append({"role": "user", "content": turn}) history.append({"role": "assistant", "content": result["reply"]})