import json, os, numpy as np, faiss from pathlib import Path from typing import Dict, List, Tuple, Any class ConceptNode: def __init__(self, cid, label, embedding, confidence, edges=None): self.cid, self.label, self.embedding, self.confidence, self.edges = cid, label, embedding, confidence, edges or [] def to_dict(self): return {"cid": self.cid, "label": self.label, "embedding": self.embedding.tolist(), "confidence": self.confidence, "edges": self.edges} @staticmethod def from_dict(d): return ConceptNode(int(d["cid"]), str(d["label"]), np.array(d["embedding"], dtype=np.float32), float(d["confidence"]), [tuple(e) for e in d.get("edges", [])]) class ConceptGraph: def __init__(self, dim=768, persist_dir="data/concept_graph"): self.dim, self.persist_dir = dim, Path(persist_dir) self.persist_dir.mkdir(parents=True, exist_ok=True) self.index = faiss.IndexFlatL2(dim) self._nodes: Dict[int, ConceptNode] = {} def add_node(self, label, embedding, confidence, edges=None): vec = embedding.astype(np.float32) vec /= np.linalg.norm(vec) self.index.add(np.expand_dims(vec, 0)) cid = self.index.ntotal - 1 node = ConceptNode(cid, label, vec, confidence, edges) self._nodes[cid] = node return cid def persist(self): with (self.persist_dir / "concepts.json").open("w") as f: json.dump([n.to_dict() for n in self._nodes.values()], f, indent=2) faiss.write_index(self.index, str(self.persist_dir / "faiss.index"))