Spaces:

build-small-hackathon
/

ShutterSearch

Sleeping

App Files Files Community

ShutterSearch / caption_store.py

SwikarG

Update caption_store.py

ac707f1 verified 18 days ago

Raw

History Blame Contribute Delete

6.32 kB

	"""Persistent caption store backed by a local JSON file."""

	import json
	import os
	import time
	from typing import Optional

	# if os.path.exists("/data"):
	# CAPTION_STORE_PATH = "/data/captions.json"
	# else:
	#
	CAPTION_STORE_PATH = "./captions.json"


	def _load() -> dict:
	if not os.path.exists(CAPTION_STORE_PATH):
	return {}
	with open(CAPTION_STORE_PATH, "r", encoding="utf-8") as f:
	return json.load(f)


	def _save(store: dict) -> None:
	with open(CAPTION_STORE_PATH, "w", encoding="utf-8") as f:
	json.dump(store, f, indent=2, ensure_ascii=False)


	def _as_list(val) -> list:
	"""Ensure a value is always a list — handles string/list/None from model output."""
	if not val:
	return []
	if isinstance(val, list):
	return val
	return [val] # single string → wrap


	def _try_parse_json(raw: str) -> dict \| None:
	"""Parse JSON, with a fallback that inserts missing commas between fields."""
	try:
	return json.loads(raw)
	except json.JSONDecodeError:
	pass
	# Common model mistake: missing comma after a string value before next key
	import re
	fixed = re.sub(r'"\s\n(\s")', r'",\n\1', raw)
	try:
	return json.loads(fixed)
	except json.JSONDecodeError:
	return None


	def flatten_metadata(meta: dict) -> str:
	"""Convert structured JSON metadata into a rich text string for embedding."""
	parts = []

	if meta.get("summary"):
	parts.append(meta["summary"])

	subj = meta.get("subjects", {})
	# NEW: Include headcount explicitly for numerical search queries (e.g., "two people")
	if subj.get("people_count") is not None:
	parts.append(f"People count: {subj['people_count']}")
	if _as_list(subj.get("attire")):
	parts.append("Attire: " + ", ".join(_as_list(subj["attire"])))
	if _as_list(subj.get("primary_subjects")):
	parts.append("Subjects: " + ", ".join(_as_list(subj["primary_subjects"])))
	if _as_list(subj.get("relationships")):
	parts.append("Relationships: " + ", ".join(_as_list(subj["relationships"])))

	scene = meta.get("scene", {})
	if scene.get("location_type"):
	parts.append("Location: " + scene["location_type"])
	if scene.get("environment"):
	parts.append("Environment: " + scene["environment"])
	if _as_list(scene.get("setting_details")):
	parts.append("Setting: " + ", ".join(_as_list(scene["setting_details"])))

	actions = meta.get("actions", {})
	if actions.get("primary_action"):
	parts.append("Action: " + actions["primary_action"])
	if _as_list(actions.get("body_language")):
	parts.append("Body language: " + ", ".join(_as_list(actions["body_language"])))

	lighting = meta.get("lighting", {})
	if lighting.get("lighting_style"):
	parts.append("Lighting: " + lighting["lighting_style"])
	if lighting.get("time_of_day_estimate"):
	parts.append("Time of day: " + lighting["time_of_day_estimate"])

	mood = meta.get("mood", {})
	if _as_list(mood.get("primary_emotions")):
	parts.append("Emotions: " + ", ".join(_as_list(mood["primary_emotions"])))
	if mood.get("atmosphere"):
	parts.append("Atmosphere: " + mood["atmosphere"])

	comp = meta.get("composition", {})
	if comp.get("shot_type"):
	parts.append("Shot: " + comp["shot_type"])
	if comp.get("camera_angle"):
	parts.append("Angle: " + comp["camera_angle"])

	tech = meta.get("technical_cues", {})
	if _as_list(tech.get("color_palette")):
	parts.append("Colors: " + ", ".join(_as_list(tech["color_palette"])))
	# NEW: Missing depth of field (crucial for portrait photography search!)
	if tech.get("depth_of_field"):
	parts.append("Depth of field: " + tech["depth_of_field"])

	if _as_list(meta.get("search_tags")):
	parts.append("Tags: " + ", ".join(_as_list(meta["search_tags"])))
	if _as_list(meta.get("archive_keywords")):
	parts.append("Keywords: " + ", ".join(_as_list(meta["archive_keywords"])))

	return " \| ".join(parts)


	def get_entry(image_path: str) -> Optional[dict]:
	return _load().get(image_path)


	def upsert_entry(image_path: str, caption: str, mtime: float, collection: str = "General") -> None:
	"""
	caption may be raw JSON string (new structured format) or plain text (legacy).
	We store both the raw string and a flattened search_text.
	"""
	store = _load()

	# Try to parse as structured JSON (with comma-fix fallback)
	search_text = caption
	meta = _try_parse_json(caption)
	if meta:
	search_text = flatten_metadata(meta)

	store[image_path] = {
	"caption": caption, # raw (JSON string or plain text)
	"search_text": search_text, # flattened for embedding
	"mtime": mtime,
	"collection": collection,
	"ingested_at": time.time(),
	}
	_save(store)


	def mark_error(image_path: str, error: str, collection: str = "General") -> None:
	store = _load()
	store[image_path] = {
	"caption": None,
	"error": error,
	"mtime": os.path.getmtime(image_path) if os.path.exists(image_path) else 0,
	"collection": collection,
	"ingested_at": time.time(),
	}
	_save(store)


	def all_entries() -> dict:
	"""Return only entries that have a valid caption."""
	return {
	path: data
	for path, data in _load().items()
	if data.get("caption")
	}


	def entry_count() -> int:
	return len(all_entries())


	def get_all_collections() -> list[str]:
	"""Return a sorted list of all unique collection names."""
	store = _load()
	collections = set()
	for entry in store.values():
	coll = entry.get("collection")
	if coll:
	collections.add(coll)
	if not collections:
	return ["General"]
	return sorted(list(collections))


	def get_entries_by_collection(collection: str) -> dict:
	"""Return all valid entries belonging to a specific collection."""
	return {
	path: data
	for path, data in all_entries().items()
	if data.get("collection") == collection
	}