DECADE / memory /semantic_store.py
anonymous-penguin's picture
Initial code release
9c60174 verified
"""
semantic_store.py
Wrapper around all_session_summary.json and all_session_user_facts.json.
Provides:
- keyword_search(): find sessions whose semantic text contains given keywords
- to_prompt(): format semantic context for LLM consumption
- get_text(): return raw semantic text for a session (for embedding/search)
"""
import json
from typing import Dict, List, Optional
class SemanticMemoryStore:
def __init__(self, summary_path: str, facts_path: str):
print(f"[SemanticMemoryStore] Loading {summary_path} ...")
with open(summary_path) as f:
self._summaries: Dict[str, dict] = json.load(f)
print(f"[SemanticMemoryStore] Loading {facts_path} ...")
with open(facts_path) as f:
self._facts: Dict[str, list] = json.load(f)
print(f"[SemanticMemoryStore] Loaded {len(self._summaries)} summaries, "
f"{len(self._facts)} fact entries.")
def get_summary(self, sess_id: str) -> str:
"""Return the session-level summary string, or empty string."""
entry = self._summaries.get(sess_id, {})
return entry.get("session_summary", "").strip()
def get_facts_text(self, sess_id: str) -> str:
"""Return user facts as a single joined string, or empty string."""
fact_list = self._facts.get(sess_id, [])
if not fact_list:
return ""
return " ".join(
f["user-info"] for f in fact_list
if isinstance(f, dict) and f.get("user-info")
).strip()
def get_text(self, sess_id: str) -> str:
"""Return summary + facts combined (for keyword search or display)."""
parts = [self.get_summary(sess_id), self.get_facts_text(sess_id)]
return " ".join(p for p in parts if p)
def keyword_search(self, keywords: List[str], haystack_sess_ids: List[str]) -> List[str]:
"""
Search semantic text (summary + facts) of the given sessions for any keyword.
Returns:
List of matching session IDs (preserving haystack order).
"""
matched = []
kws_lower = [kw.lower() for kw in keywords if kw]
for sid in haystack_sess_ids:
text = self.get_text(sid).lower()
if any(kw in text for kw in kws_lower):
matched.append(sid)
return matched
def to_prompt(self, sess_ids: List[str], date_lookup: Optional[Dict[str, str]] = None) -> str:
"""
Format semantic context for these sessions as a prompt string.
Each session block:
Session Date: <date>
Summary: <session_summary>
User Facts: <fact1>; <fact2>; ...
"""
lines = []
for sid in sess_ids:
date_str = date_lookup.get(sid, "") if date_lookup else ""
summary = self.get_summary(sid)
facts_text = self.get_facts_text(sid)
block = f"Session ID: {sid}"
if date_str:
block += f"\nSession Date: {date_str}"
if summary:
block += f"\nSummary: {summary}"
if facts_text:
block += f"\nUser Facts: {facts_text}"
lines.append(block)
return "\n\n".join(lines)