ground-zero / src /engine /curiosity.py
jefffffff9
Add confidence loop, curiosity engine, and lightweight TTS
3657607
"""
CuriosityEngine — proactive vocabulary gap analysis.
Every N interactions (default: 5), sends the last 10 vocabulary entries to
the LLM and asks it to identify one related agricultural / everyday term that
is missing from the learner's vocabulary, then formulate a question asking the
user how to say that word in their language.
Usage in app_lab.py:
_curiosity = CuriosityEngine(interval=5)
# Inside _run_llm_and_tts, after the main LLM call:
question = _curiosity.maybe_ask(_memory, _gemma)
if question:
history.append({"role": "assistant", "content": f"🌱 {question}"})
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Optional
if TYPE_CHECKING:
from src.memory.memory_manager import MemoryManager
from src.llm.gemma_client import GemmaClient
logger = logging.getLogger(__name__)
_CURIOSITY_SYSTEM = """\
You are a language-learning assistant that notices gaps in a West African vocabulary list.
Reply with a single valid JSON object and nothing else.\
"""
_CURIOSITY_USER_TEMPLATE = """\
Here are the {n} most recent words I have learned so far:
{vocab_list}
Based on these words, what is ONE related agricultural or common everyday term \
I am likely missing? Formulate a short, warm question asking the user how to say \
that missing word in their language.
Reply only with this JSON:
{{
"word_suggestion": "<the English word you think is missing>",
"question": "<one friendly sentence asking the user>"
}}
"""
class CuriosityEngine:
"""Fires a vocabulary-gap prompt every `interval` user interactions."""
def __init__(self, interval: int = 5) -> None:
self._interval = interval
self._interaction = 0
def maybe_ask(
self,
memory: "MemoryManager",
gemma: "GemmaClient",
) -> Optional[str]:
"""
Increment the interaction counter. On every `interval`-th call, query
the LLM for a missing vocabulary term and return the question string.
Returns None on all other calls, or if vocabulary is too sparse, or if
the LLM call fails.
"""
self._interaction += 1
if self._interaction % self._interval != 0:
return None
entries = memory.get_all()
if len(entries) < 3:
logger.debug("CuriosityEngine: vocabulary too sparse (%d entries)", len(entries))
return None
recent = entries[-10:]
lines = [
f" [{e.get('language','?')}] {e.get('word','')} = {e.get('translation','')}"
for e in recent
]
prompt = _CURIOSITY_USER_TEMPLATE.format(
n=len(lines),
vocab_list="\n".join(lines),
)
try:
# Pass the curiosity prompt as user text; empty vocab context to avoid
# duplicating the word list inside the system prompt.
result = gemma.chat(prompt, vocabulary_context="(see above)")
question = result.get("question") or result.get("response")
if question:
word = result.get("word_suggestion", "")
logger.info(
"CuriosityEngine: suggesting '%s' — %s",
word,
question[:80],
)
return question.strip()
except Exception as exc:
logger.warning("CuriosityEngine: LLM call failed: %s", exc)
return None