ShutterSearch / caption_store.py
SwikarG's picture
Update caption_store.py
ac707f1 verified
Raw
History Blame Contribute Delete
6.32 kB
"""Persistent caption store backed by a local JSON file."""
import json
import os
import time
from typing import Optional
# if os.path.exists("/data"):
# CAPTION_STORE_PATH = "/data/captions.json"
# else:
#
CAPTION_STORE_PATH = "./captions.json"
def _load() -> dict:
if not os.path.exists(CAPTION_STORE_PATH):
return {}
with open(CAPTION_STORE_PATH, "r", encoding="utf-8") as f:
return json.load(f)
def _save(store: dict) -> None:
with open(CAPTION_STORE_PATH, "w", encoding="utf-8") as f:
json.dump(store, f, indent=2, ensure_ascii=False)
def _as_list(val) -> list:
"""Ensure a value is always a list — handles string/list/None from model output."""
if not val:
return []
if isinstance(val, list):
return val
return [val] # single string → wrap
def _try_parse_json(raw: str) -> dict | None:
"""Parse JSON, with a fallback that inserts missing commas between fields."""
try:
return json.loads(raw)
except json.JSONDecodeError:
pass
# Common model mistake: missing comma after a string value before next key
import re
fixed = re.sub(r'"\s*\n(\s*")', r'",\n\1', raw)
try:
return json.loads(fixed)
except json.JSONDecodeError:
return None
def flatten_metadata(meta: dict) -> str:
"""Convert structured JSON metadata into a rich text string for embedding."""
parts = []
if meta.get("summary"):
parts.append(meta["summary"])
subj = meta.get("subjects", {})
# NEW: Include headcount explicitly for numerical search queries (e.g., "two people")
if subj.get("people_count") is not None:
parts.append(f"People count: {subj['people_count']}")
if _as_list(subj.get("attire")):
parts.append("Attire: " + ", ".join(_as_list(subj["attire"])))
if _as_list(subj.get("primary_subjects")):
parts.append("Subjects: " + ", ".join(_as_list(subj["primary_subjects"])))
if _as_list(subj.get("relationships")):
parts.append("Relationships: " + ", ".join(_as_list(subj["relationships"])))
scene = meta.get("scene", {})
if scene.get("location_type"):
parts.append("Location: " + scene["location_type"])
if scene.get("environment"):
parts.append("Environment: " + scene["environment"])
if _as_list(scene.get("setting_details")):
parts.append("Setting: " + ", ".join(_as_list(scene["setting_details"])))
actions = meta.get("actions", {})
if actions.get("primary_action"):
parts.append("Action: " + actions["primary_action"])
if _as_list(actions.get("body_language")):
parts.append("Body language: " + ", ".join(_as_list(actions["body_language"])))
lighting = meta.get("lighting", {})
if lighting.get("lighting_style"):
parts.append("Lighting: " + lighting["lighting_style"])
if lighting.get("time_of_day_estimate"):
parts.append("Time of day: " + lighting["time_of_day_estimate"])
mood = meta.get("mood", {})
if _as_list(mood.get("primary_emotions")):
parts.append("Emotions: " + ", ".join(_as_list(mood["primary_emotions"])))
if mood.get("atmosphere"):
parts.append("Atmosphere: " + mood["atmosphere"])
comp = meta.get("composition", {})
if comp.get("shot_type"):
parts.append("Shot: " + comp["shot_type"])
if comp.get("camera_angle"):
parts.append("Angle: " + comp["camera_angle"])
tech = meta.get("technical_cues", {})
if _as_list(tech.get("color_palette")):
parts.append("Colors: " + ", ".join(_as_list(tech["color_palette"])))
# NEW: Missing depth of field (crucial for portrait photography search!)
if tech.get("depth_of_field"):
parts.append("Depth of field: " + tech["depth_of_field"])
if _as_list(meta.get("search_tags")):
parts.append("Tags: " + ", ".join(_as_list(meta["search_tags"])))
if _as_list(meta.get("archive_keywords")):
parts.append("Keywords: " + ", ".join(_as_list(meta["archive_keywords"])))
return " | ".join(parts)
def get_entry(image_path: str) -> Optional[dict]:
return _load().get(image_path)
def upsert_entry(image_path: str, caption: str, mtime: float, collection: str = "General") -> None:
"""
caption may be raw JSON string (new structured format) or plain text (legacy).
We store both the raw string and a flattened search_text.
"""
store = _load()
# Try to parse as structured JSON (with comma-fix fallback)
search_text = caption
meta = _try_parse_json(caption)
if meta:
search_text = flatten_metadata(meta)
store[image_path] = {
"caption": caption, # raw (JSON string or plain text)
"search_text": search_text, # flattened for embedding
"mtime": mtime,
"collection": collection,
"ingested_at": time.time(),
}
_save(store)
def mark_error(image_path: str, error: str, collection: str = "General") -> None:
store = _load()
store[image_path] = {
"caption": None,
"error": error,
"mtime": os.path.getmtime(image_path) if os.path.exists(image_path) else 0,
"collection": collection,
"ingested_at": time.time(),
}
_save(store)
def all_entries() -> dict:
"""Return only entries that have a valid caption."""
return {
path: data
for path, data in _load().items()
if data.get("caption")
}
def entry_count() -> int:
return len(all_entries())
def get_all_collections() -> list[str]:
"""Return a sorted list of all unique collection names."""
store = _load()
collections = set()
for entry in store.values():
coll = entry.get("collection")
if coll:
collections.add(coll)
if not collections:
return ["General"]
return sorted(list(collections))
def get_entries_by_collection(collection: str) -> dict:
"""Return all valid entries belonging to a specific collection."""
return {
path: data
for path, data in all_entries().items()
if data.get("collection") == collection
}