Spaces:

AdarshDRC
/

visual-search-api

Running

App Files Files Community

visual-search-api / scripts /calibrate_threshold.py

AdarshDRC

fix: Resolving backend

29bfc1f 5 days ago

raw

history blame contribute delete

6.09 kB

	"""
	Threshold calibration tool.

	Use this to find the FACE_MATCH_THRESHOLD that gives you the best
	precision/recall tradeoff for YOUR specific data. Default 0.28 is an
	industry-average — your data may differ.

	Usage:
	1. Build two test sets:
	- POSITIVE_PAIRS: pairs of (query_image, gallery_image) of the SAME person
	- NEGATIVE_PAIRS: pairs of DIFFERENT people (hard negatives help most)

	2. Populate TEST_PAIRS below with local image paths

	3. Run: python scripts/calibrate_threshold.py

	Output: table of thresholds with TP/FP/FN/precision/recall/F1.
	"""
	import sys
	import os
	from pathlib import Path

	# Add project root to path so `src.*` imports work when running from scripts/
	sys.path.insert(0, str(Path(__file__).parent.parent))

	import numpy as np
	from PIL import Image


	# ── EDIT THESE ──────────────────────────────────────────────
	# Each tuple: (path_to_query_image, path_to_gallery_image, is_same_person)
	TEST_PAIRS = [
	# Example positives (same person, different photos)
	# ("test_data/alice_1.jpg", "test_data/alice_2.jpg", True),
	# ("test_data/alice_1.jpg", "test_data/alice_3.jpg", True),
	# ("test_data/bob_1.jpg", "test_data/bob_2.jpg", True),

	# Example hard negatives (different people, similar looking)
	# ("test_data/alice_1.jpg", "test_data/carol_1.jpg", False),
	# ("test_data/bob_1.jpg", "test_data/dave_1.jpg", False),
	]
	# ────────────────────────────────────────────────────────────


	def cosine(a: np.ndarray, b: np.ndarray) -> float:
	return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-9))


	def compute_pair_scores():
	"""Returns list of (fused_score, arcface_score, adaface_score, is_positive)."""
	from src.services.ai_manager import AIModelManager
	print("Loading models...")
	ai = AIModelManager()

	results = []
	for query_path, gallery_path, is_positive in TEST_PAIRS:
	if not (os.path.exists(query_path) and os.path.exists(gallery_path)):
	print(f" Skipping missing: {query_path} or {gallery_path}")
	continue

	with open(query_path, "rb") as f:
	q_vectors = ai.process_image_bytes(f.read(), detect_faces=True)
	with open(gallery_path, "rb") as f:
	g_vectors = ai.process_image_bytes(f.read(), detect_faces=True)

	q_faces = [v for v in q_vectors if v["type"] == "face"]
	g_faces = [v for v in g_vectors if v["type"] == "face"]

	if not q_faces or not g_faces:
	print(f" No face in: {query_path} or {gallery_path}")
	continue

	# Take largest face from each
	qf = max(q_faces, key=lambda f: f.get("face_width_px", 0))
	gf = max(g_faces, key=lambda f: f.get("face_width_px", 0))

	arc_score = cosine(qf["arcface_vector"], gf["arcface_vector"])
	if qf.get("has_adaface") and gf.get("has_adaface"):
	ada_score = cosine(qf["adaface_vector"], gf["adaface_vector"])
	else:
	ada_score = 0.15

	fused = 0.6 * arc_score + 0.4 * ada_score

	results.append({
	"query": query_path,
	"gallery": gallery_path,
	"is_positive": is_positive,
	"arcface": arc_score,
	"adaface": ada_score,
	"fused": fused,
	})

	tag = "SAME" if is_positive else "DIFF"
	print(f" [{tag}] arc={arc_score:.3f} ada={ada_score:.3f} fused={fused:.3f}")

	return results


	def evaluate_thresholds(results):
	"""Sweep thresholds and compute P/R/F1 for each."""
	if not results:
	print("\nNo results to evaluate. Add pairs to TEST_PAIRS above.")
	return

	print("\n" + "=" * 78)
	print(f"{'arcface_thr':<14}{'fused_thr':<14}{'TP':>6}{'FP':>6}{'FN':>6}"
	f"{'Precision':>12}{'Recall':>10}{'F1':>8}")
	print("=" * 78)

	n_positive = sum(1 for r in results if r["is_positive"])

	best = {"f1": 0, "arc_thr": 0, "fused_thr": 0}

	for arc_thr in [0.20, 0.24, 0.28, 0.32, 0.36, 0.40, 0.45]:
	for fused_thr in [0.22, 0.26, 0.30, 0.34, 0.38]:
	tp = fp = fn = 0
	for r in results:
	# A match passes both thresholds
	predicted_match = (r["arcface"] >= arc_thr and r["fused"] >= fused_thr)
	if r["is_positive"]:
	if predicted_match:
	tp += 1
	else:
	fn += 1
	else:
	if predicted_match:
	fp += 1
	precision = tp / (tp + fp) if (tp + fp) else 0
	recall = tp / (tp + fn) if (tp + fn) else 0
	f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0

	if f1 > best["f1"]:
	best = {"f1": f1, "arc_thr": arc_thr, "fused_thr": fused_thr,
	"tp": tp, "fp": fp, "fn": fn,
	"precision": precision, "recall": recall}

	print(f"{arc_thr:<14.2f}{fused_thr:<14.2f}{tp:>6}{fp:>6}{fn:>6}"
	f"{precision:>12.3f}{recall:>10.3f}{f1:>8.3f}")

	print("=" * 78)
	print(f"\nBest F1: {best['f1']:.3f}")
	print(f" FACE_MATCH_THRESHOLD = {best['arc_thr']}")
	print(f" FUSED_MATCH_THRESHOLD = {best['fused_thr']}")
	print(f" Precision = {best['precision']:.3f}, Recall = {best['recall']:.3f}")
	print("\nUpdate these in your HF Space env vars.")


	if __name__ == "__main__":
	if not TEST_PAIRS:
	print("Edit scripts/calibrate_threshold.py and populate TEST_PAIRS with")
	print("10-30 positive pairs and 10-30 hard-negative pairs, then re-run.")
	print("\nTip: export ~50 face photos from your own gallery, hand-label")
	print("the same-person pairs, and use those for calibration.")
	sys.exit(1)

	results = compute_pair_scores()
	evaluate_thresholds(results)