| """ |
| semantic_store.py |
| |
| Wrapper around all_session_summary.json and all_session_user_facts.json. |
| Provides: |
| - keyword_search(): find sessions whose semantic text contains given keywords |
| - to_prompt(): format semantic context for LLM consumption |
| - get_text(): return raw semantic text for a session (for embedding/search) |
| """ |
|
|
| import json |
| from typing import Dict, List, Optional |
|
|
|
|
| class SemanticMemoryStore: |
| def __init__(self, summary_path: str, facts_path: str): |
| print(f"[SemanticMemoryStore] Loading {summary_path} ...") |
| with open(summary_path) as f: |
| self._summaries: Dict[str, dict] = json.load(f) |
|
|
| print(f"[SemanticMemoryStore] Loading {facts_path} ...") |
| with open(facts_path) as f: |
| self._facts: Dict[str, list] = json.load(f) |
|
|
| print(f"[SemanticMemoryStore] Loaded {len(self._summaries)} summaries, " |
| f"{len(self._facts)} fact entries.") |
|
|
| def get_summary(self, sess_id: str) -> str: |
| """Return the session-level summary string, or empty string.""" |
| entry = self._summaries.get(sess_id, {}) |
| return entry.get("session_summary", "").strip() |
|
|
| def get_facts_text(self, sess_id: str) -> str: |
| """Return user facts as a single joined string, or empty string.""" |
| fact_list = self._facts.get(sess_id, []) |
| if not fact_list: |
| return "" |
| return " ".join( |
| f["user-info"] for f in fact_list |
| if isinstance(f, dict) and f.get("user-info") |
| ).strip() |
|
|
| def get_text(self, sess_id: str) -> str: |
| """Return summary + facts combined (for keyword search or display).""" |
| parts = [self.get_summary(sess_id), self.get_facts_text(sess_id)] |
| return " ".join(p for p in parts if p) |
|
|
| def keyword_search(self, keywords: List[str], haystack_sess_ids: List[str]) -> List[str]: |
| """ |
| Search semantic text (summary + facts) of the given sessions for any keyword. |
| |
| Returns: |
| List of matching session IDs (preserving haystack order). |
| """ |
| matched = [] |
| kws_lower = [kw.lower() for kw in keywords if kw] |
| for sid in haystack_sess_ids: |
| text = self.get_text(sid).lower() |
| if any(kw in text for kw in kws_lower): |
| matched.append(sid) |
| return matched |
|
|
| def to_prompt(self, sess_ids: List[str], date_lookup: Optional[Dict[str, str]] = None) -> str: |
| """ |
| Format semantic context for these sessions as a prompt string. |
| |
| Each session block: |
| Session Date: <date> |
| Summary: <session_summary> |
| User Facts: <fact1>; <fact2>; ... |
| """ |
| lines = [] |
| for sid in sess_ids: |
| date_str = date_lookup.get(sid, "") if date_lookup else "" |
| summary = self.get_summary(sid) |
| facts_text = self.get_facts_text(sid) |
|
|
| block = f"Session ID: {sid}" |
| if date_str: |
| block += f"\nSession Date: {date_str}" |
| if summary: |
| block += f"\nSummary: {summary}" |
| if facts_text: |
| block += f"\nUser Facts: {facts_text}" |
| lines.append(block) |
|
|
| return "\n\n".join(lines) |
|
|