""" semantic_store.py Wrapper around all_session_summary.json and all_session_user_facts.json. Provides: - keyword_search(): find sessions whose semantic text contains given keywords - to_prompt(): format semantic context for LLM consumption - get_text(): return raw semantic text for a session (for embedding/search) """ import json from typing import Dict, List, Optional class SemanticMemoryStore: def __init__(self, summary_path: str, facts_path: str): print(f"[SemanticMemoryStore] Loading {summary_path} ...") with open(summary_path) as f: self._summaries: Dict[str, dict] = json.load(f) print(f"[SemanticMemoryStore] Loading {facts_path} ...") with open(facts_path) as f: self._facts: Dict[str, list] = json.load(f) print(f"[SemanticMemoryStore] Loaded {len(self._summaries)} summaries, " f"{len(self._facts)} fact entries.") def get_summary(self, sess_id: str) -> str: """Return the session-level summary string, or empty string.""" entry = self._summaries.get(sess_id, {}) return entry.get("session_summary", "").strip() def get_facts_text(self, sess_id: str) -> str: """Return user facts as a single joined string, or empty string.""" fact_list = self._facts.get(sess_id, []) if not fact_list: return "" return " ".join( f["user-info"] for f in fact_list if isinstance(f, dict) and f.get("user-info") ).strip() def get_text(self, sess_id: str) -> str: """Return summary + facts combined (for keyword search or display).""" parts = [self.get_summary(sess_id), self.get_facts_text(sess_id)] return " ".join(p for p in parts if p) def keyword_search(self, keywords: List[str], haystack_sess_ids: List[str]) -> List[str]: """ Search semantic text (summary + facts) of the given sessions for any keyword. Returns: List of matching session IDs (preserving haystack order). """ matched = [] kws_lower = [kw.lower() for kw in keywords if kw] for sid in haystack_sess_ids: text = self.get_text(sid).lower() if any(kw in text for kw in kws_lower): matched.append(sid) return matched def to_prompt(self, sess_ids: List[str], date_lookup: Optional[Dict[str, str]] = None) -> str: """ Format semantic context for these sessions as a prompt string. Each session block: Session Date: Summary: User Facts: ; ; ... """ lines = [] for sid in sess_ids: date_str = date_lookup.get(sid, "") if date_lookup else "" summary = self.get_summary(sid) facts_text = self.get_facts_text(sid) block = f"Session ID: {sid}" if date_str: block += f"\nSession Date: {date_str}" if summary: block += f"\nSummary: {summary}" if facts_text: block += f"\nUser Facts: {facts_text}" lines.append(block) return "\n\n".join(lines)