Spaces:

build-small-hackathon
/

daimon

Running

App Files Files Community

daimon / engine /loop.py

davidquicast

chore: initial commit

f0347b4 1 day ago

Raw

History Blame Contribute Delete

7.58 kB

	"""F2 living loop: orchestrates one chat turn through all six steps.

	1. RESPONSE - MiniCPM5-1B replies as Daimon, using PERSONA.md itself
	(engine/recompile.py, re-rendered from personaxis.md +
	state.json before every turn) as the system prompt -
	this IS Daimon's self-improving prompt - plus the
	conversation history the caller passes in (the UI's
	own `gr.Chatbot` history; see build_messages()).
	2. APPRAISAL - engine/appraise.py: small GBNF-constrained JSON signals.
	3. MAPPING - engine/mapping.py: deterministic signals -> deltas.
	4. GOVERN+CLAMP - engine/spec_bridge.py: `mutate()` per delta (clamp +
	envelope check + audit log live in the spec engine,
	not here).
	5. RECOMPILE - engine/recompile.py: re-render PERSONA.md from the
	updated state.json.
	6. MEMORY - engine/memory.py: let the model curate memory.md and
	memory/<date>.md (cross-session long-term memory +
	this session's consolidated summary) - two extra
	small local-model calls are cheap, so they run every
	turn.

	The model never writes state.json directly - every mutation goes through
	spec_bridge.mutate(), which reads personaxis.md's declared envelopes and
	applies clamping and the audit log entirely in Python.

	Run the Gate G2 smoke test (5 turns) with:

	python -m engine.loop

	Requires `bash model/serve.sh` (MiniCPM5-1B on http://localhost:8080/v1) to be
	running first - see MASTER_CHECKLIST F0.
	"""

	from __future__ import annotations

	import sys
	from pathlib import Path
	from typing import Any

	REPO_ROOT = Path(__file__).resolve().parent.parent
	if str(REPO_ROOT) not in sys.path:
	sys.path.insert(0, str(REPO_ROOT))

	from engine import mapping, memory, recompile # noqa: E402
	from engine.appraise import appraise # noqa: E402
	from engine.spec_bridge import SpecBridgeError, get_state, mutate # noqa: E402
	from model.client import THINKING_MODE, chat, chat_stream # noqa: E402

	SLUG = "daimon"

	# In thinking mode, the <think> block itself can run past 1000 tokens with
	# Daimon's full system prompt before any reply content is produced - give it
	# plenty of room (CTX=32768), and ask the model to keep that reasoning short
	# (see build_messages) so it doesn't eat the whole budget before replying.
	DEFAULT_MAX_TOKENS = 4096 if THINKING_MODE else 300

	_THINKING_BUDGET_NOTE = (
	"\n\n## Reasoning budget\n\nBefore replying, think briefly - a short "
	"paragraph, not an essay - then give your reply. Always leave room for "
	"the reply itself; an unfinished reply is worse than a short one."
	)

	# How many prior chat turns (user+assistant pairs) to replay for multi-turn
	# coherence. The conversation itself lives only in the caller's gr.Chatbot
	# history - nothing is persisted to disk.
	MAX_HISTORY_TURNS = 8


	def build_messages(user_message: str, history: list[dict[str, Any]] \| None = None) -> list[dict[str, str]]:
	# PERSONA.md is the system prompt: Identity, Character, Personality &
	# Voice, Values, Limits, Self-Improvement, plus the live "Current State"
	# section, all re-rendered from personaxis.md + state.json every turn.
	system_prompt = recompile.render(SLUG)
	if THINKING_MODE:
	system_prompt += _THINKING_BUDGET_NOTE

	messages = [{"role": "system", "content": system_prompt}]
	for turn in (history or [])[-MAX_HISTORY_TURNS * 2 :]:
	role, content = turn.get("role"), turn.get("content")
	# Skip collapsible "thinking" bubbles (gr.Chatbot metadata) - only
	# replay the visible user/assistant text.
	if role in ("user", "assistant") and isinstance(content, str) and content and not turn.get("metadata"):
	messages.append({"role": role, "content": content})
	messages.append({"role": "user", "content": user_message})
	return messages


	def finish_turn(user_message: str, reply: str) -> dict[str, Any]:
	"""Steps 2-6: appraise the (user_message, reply) pair, map signals to
	deltas, apply clamped+audited mutations, recompile PERSONA.md, and let
	the model curate its long-term memory. Used by both step() (non-streaming)
	and the UI/API, after the reply has finished streaming."""
	signals = appraise(user_message, reply)
	deltas = mapping.signals_to_deltas(signals)

	applied: list[dict[str, Any]] = []
	for field, delta, reason in deltas:
	try:
	applied.append(mutate(SLUG, field, delta, reason=reason, actor="actor-llm"))
	except SpecBridgeError as exc:
	applied.append({"field": field, "delta": delta, "blocked": True, "error": str(exc)})

	persona_live_path = recompile.write(SLUG)
	memory.curate_memory(SLUG, user_message, reply)

	return {
	"reply": reply,
	"signals": signals,
	"mutations": applied,
	"persona_live_path": str(persona_live_path),
	"state": get_state(SLUG),
	}


	def step(user_message: str, history: list[dict[str, Any]] \| None = None, *, max_tokens: int = DEFAULT_MAX_TOKENS) -> dict[str, Any]:
	"""Run one full living-loop turn. Returns the reply, appraisal signals,
	mutations actually applied (clamped/blocked included), the path to the
	re-rendered PERSONA.md, and the resulting state."""
	messages = build_messages(user_message, history)
	reply = chat(messages, modality="text", max_tokens=max_tokens)
	return finish_turn(user_message, reply)


	def step_stream(user_message: str, history: list[dict[str, Any]] \| None = None, *, max_tokens: int = DEFAULT_MAX_TOKENS):
	"""Generator: yields ("thinking" \| "content", text) chunks as the reply
	streams in, then a final ("done", reply) tuple with the full reply text.
	Does NOT run steps 2-6 - callers run finish_turn(user_message, reply)
	themselves once they're ready (e.g. after unblocking the chat UI)."""
	messages = build_messages(user_message, history)
	parts: list[str] = []
	for kind, text in chat_stream(messages, modality="text", max_tokens=max_tokens):
	if kind == "content":
	parts.append(text)
	yield kind, text
	yield "done", "".join(parts)


	# Gate G2 smoke test: 5 turns that should each nudge the vector and leave an
	# audit trail, without runaway (deltas are capped, see engine/mapping.py).
	_DEMO_TURNS = [
	"Hey Daimon! I love how curious you are, tell me something interesting.",
	"Whoa, that's such a cool fact, can you go deeper on that?",
	"Actually, can you be a bit more reserved and less chatty for a moment?",
	"Sorry if that came across harsh, I just need to focus for a bit.",
	"No worries! I'm back, let's keep exploring - what else have you got?",
	]


	if __name__ == "__main__":
	sys.stdout.reconfigure(encoding="utf-8")

	# Stand in for gr.Chatbot's session history: each turn appends its own
	# (user, reply) pair so the next turn keeps multi-turn context, just
	# like the UI does - nothing is persisted to disk.
	history: list[dict[str, Any]] = []
	for i, turn in enumerate(_DEMO_TURNS, start=1):
	print(f"\n=== Turn {i}: {turn!r} ===")
	result = step(turn, history)
	print("reply:", result["reply"])
	print("signals:", result["signals"])
	print("mutations:", result["mutations"])
	history.append({"role": "user", "content": turn})
	history.append({"role": "assistant", "content": result["reply"]})