Spaces:

NeerajCodz
/

aiBatteryLifeCycle

Sleeping

App Files Files Community

aiBatteryLifeCycle / api /routers /visualize.py

NeerajCodz

Add versioned figures manifest support

770f92a about 1 month ago

raw

history blame contribute delete

25.4 kB

	"""
	api.routers.visualize
	=====================
	Endpoints that serve pre-computed or on-demand visualisation data
	consumed by the React frontend.
	"""

	from __future__ import annotations

	import json
	import os
	from io import StringIO
	from pathlib import Path
	from typing import Any
	from urllib.parse import quote, urlparse
	from urllib.error import HTTPError, URLError
	from urllib.request import Request, urlopen

	import pandas as pd
	from fastapi import APIRouter, HTTPException
	from fastapi.responses import FileResponse, RedirectResponse

	from api.model_registry import registry, classify_degradation, soh_to_color
	from api.schemas import BatteryVizData, DashboardData

	router = APIRouter(prefix="/api", tags=["visualization"])

	_PROJECT = Path(__file__).resolve().parents[2]
	_ARTIFACTS = _PROJECT / "artifacts"
	_FIGURES = _ARTIFACTS / "figures"
	_DATASET = _PROJECT / "cleaned_dataset"
	_HF_RAW_BASE = os.getenv(
	"HF_ARTIFACTS_RAW_BASE",
	"https://huggingface.co/NeerajCodz/aiBatteryLifeCycle/resolve/main",
	)
	_HF_TREE_API_BASE = os.getenv(
	"HF_ARTIFACTS_TREE_API_BASE",
	"https://huggingface.co/api/models/NeerajCodz/aiBatteryLifeCycle/tree/main",
	)

	_SUPPORTED_VERSIONS = {"v1", "v2", "v3"}


	# ── Dashboard aggregate ──────────────────────────────────────────────────────
	@router.get("/dashboard", response_model=DashboardData)
	async def dashboard():
	"""Return full dashboard payload for the frontend."""
	# Battery summary
	metadata_path = _DATASET / "metadata.csv"
	batteries: list[BatteryVizData] = []
	capacity_fade: dict[str, list[float]] = {}

	if metadata_path.exists():
	meta = pd.read_csv(metadata_path)
	for bid in meta["battery_id"].unique():
	sub = meta[meta["battery_id"] == bid].sort_values("start_time")
	caps_s = pd.to_numeric(sub["Capacity"], errors="coerce").dropna()
	if caps_s.empty:
	continue
	caps = caps_s.tolist()
	last_cap = float(caps[-1])
	soh = (last_cap / 2.0) * 100
	avg_temp = float(sub["ambient_temperature"].mean())
	cycle = len(sub)
	batteries.append(BatteryVizData(
	battery_id=bid,
	soh_pct=round(soh, 1),
	temperature=round(avg_temp, 1),
	cycle_number=cycle,
	degradation_state=classify_degradation(soh),
	color_hex=soh_to_color(soh),
	))
	capacity_fade[bid] = caps

	model_metrics = registry.get_metrics()
	# Find best model
	best_model = "none"
	best_r2 = -999
	for name, m in model_metrics.items():
	r2 = m.get("R2", -999)
	if r2 > best_r2:
	best_r2 = r2
	best_model = name

	return DashboardData(
	batteries=batteries,
	capacity_fade=capacity_fade,
	model_metrics=model_metrics,
	best_model=best_model,
	)


	# ── Capacity fade for a specific battery ─────────────────────────────────────
	@router.get("/battery/{battery_id}/capacity")
	async def battery_capacity(battery_id: str):
	"""Return cycle-by-cycle capacity for one battery."""
	meta_path = _DATASET / "metadata.csv"
	if not meta_path.exists():
	raise HTTPException(404, "Metadata not found")
	meta = pd.read_csv(meta_path)
	sub = meta[meta["battery_id"] == battery_id].sort_values("start_time")
	if sub.empty:
	raise HTTPException(404, f"Battery {battery_id} not found")
	caps = pd.to_numeric(sub["Capacity"], errors="coerce").dropna().tolist()
	cycles = list(range(1, len(caps) + 1))
	soh_list = [(float(c) / 2.0) * 100 for c in caps]
	return {"battery_id": battery_id, "cycles": cycles, "capacity_ah": caps, "soh_pct": soh_list}


	# ── Serve saved figures ──────────────────────────────────────────────────────
	@router.get("/figures/{filename}")
	async def get_figure(filename: str):
	"""Serve a saved matplotlib/plotly figure from artifacts/figures."""
	path = _FIGURES / filename
	if not path.exists():
	raise HTTPException(404, f"Figure {filename} not found")
	content_type = "image/png"
	if path.suffix == ".html":
	content_type = "text/html"
	elif path.suffix == ".svg":
	content_type = "image/svg+xml"
	return FileResponse(path, media_type=content_type)


	# ── Figures listing ──────────────────────────────────────────────────────────
	@router.get("/figures")
	async def list_figures():
	"""List all available figures."""
	if not _FIGURES.exists():
	return []
	return sorted([f.name for f in _FIGURES.iterdir() if f.is_file()])


	# ── Battery list ─────────────────────────────────────────────────────────────
	@router.get("/batteries")
	async def list_batteries():
	"""Return all battery IDs and basic stats."""
	meta_path = _DATASET / "metadata.csv"
	if not meta_path.exists():
	return []
	meta = pd.read_csv(meta_path)
	out = []
	for bid in sorted(meta["battery_id"].unique()):
	sub = meta[meta["battery_id"] == bid]
	caps = pd.to_numeric(sub["Capacity"], errors="coerce").dropna()
	out.append({
	"battery_id": bid,
	"n_cycles": len(sub),
	"last_capacity": round(float(caps.iloc[-1]), 4) if len(caps) else None,
	"soh_pct": round((float(caps.iloc[-1]) / 2.0) * 100, 1) if len(caps) else None,
	"ambient_temperature": round(float(sub["ambient_temperature"].mean()), 1),
	})
	return out


	# ── Comprehensive metrics endpoint ───────────────────────────────────────────
	def _hf_url(rel_path: str) -> str:
	return f"{_HF_RAW_BASE.rstrip('/')}/{rel_path.lstrip('/')}"


	def _hf_version_url(version: str, rel_path: str) -> str:
	return _hf_url(f"{version}/{rel_path.lstrip('/')}")


	def _hf_tree_api_url(rel_path: str = "") -> str:
	base = _HF_TREE_API_BASE.rstrip("/")
	rel = rel_path.lstrip("/")
	return f"{base}/{rel}" if rel else base


	def _read_remote_text(url: str) -> str \| None:
	req = Request(url, headers={"User-Agent": "aiBatteryLifecycle/metrics"})
	try:
	with urlopen(req, timeout=15) as resp:
	charset = resp.headers.get_content_charset() or "utf-8"
	return resp.read().decode(charset, errors="replace")
	except (HTTPError, URLError, TimeoutError, ValueError):
	return None


	def _read_remote_json(url: str) -> dict:
	text = _read_remote_text(url)
	if not text:
	return {}
	try:
	data = json.loads(text)
	return data if isinstance(data, dict) else {}
	except Exception:
	return {}


	def _extract_figure_names_from_datamap(datamap: dict) -> list[str]:
	files = datamap.get("files", []) if isinstance(datamap, dict) else []
	names: list[str] = []
	for item in files:
	if not isinstance(item, dict):
	continue
	rel = str(item.get("path", ""))
	if not rel.startswith("figures/"):
	continue
	name = Path(rel).name
	if Path(name).suffix.lower() in (".png", ".svg", ".jpg", ".jpeg", ".webp"):
	names.append(name)
	return sorted(set(names))


	def _list_tree_api_figures(version: str) -> list[str]:
	text = _read_remote_text(_hf_tree_api_url(f"{version}/figures"))
	if not text:
	return []
	try:
	data = json.loads(text)
	except Exception:
	return []
	if not isinstance(data, list):
	return []
	names: list[str] = []
	for item in data:
	if not isinstance(item, dict):
	continue
	rel = str(item.get("path") or item.get("name") or "")
	name = Path(rel).name
	if Path(name).suffix.lower() in (".png", ".svg", ".jpg", ".jpeg", ".webp"):
	names.append(name)
	return sorted(set(names))


	def _safe_read_json_any(path: Path, remote_url: str \| None = None) -> Any:
	"""Read JSON (dict/list) from disk, then optional remote fallback."""
	if path.exists():
	try:
	return json.loads(path.read_text(encoding="utf-8"))
	except Exception:
	pass
	if remote_url:
	text = _read_remote_text(remote_url)
	if text:
	try:
	return json.loads(text)
	except Exception:
	pass
	return None


	def _looks_like_url(value: str) -> bool:
	return value.startswith("http://") or value.startswith("https://")


	def _location_filename(location: str) -> str:
	loc = location.strip()
	if not loc:
	return ""
	if _looks_like_url(loc):
	return Path(urlparse(loc).path).name
	if loc.startswith("figures/"):
	return Path(loc).name
	return Path(loc).name


	def _prettify_figure_name(raw: str) -> str:
	if not raw:
	return ""
	base = Path(raw).stem if Path(raw).suffix else raw
	return base.replace("_", " ").replace("-", " ").strip()


	def _tags_from_label(label: str) -> list[str]:
	base = Path(label).stem if Path(label).suffix else label
	raw_parts = (
	base.replace("-", "_")
	.replace(" ", "_")
	.replace(".", "_")
	.lower()
	.split("_")
	)
	stop = {"and", "the", "all", "by", "vs", "with", "for"}
	out: list[str] = []
	seen: set[str] = set()
	for part in raw_parts:
	token = part.strip()
	if len(token) < 2 or token in stop or token in seen:
	continue
	seen.add(token)
	out.append(token)
	return out


	def _location_to_serve_url(version: str, location: str) -> str:
	loc = location.strip()
	if not loc:
	return ""
	if _looks_like_url(loc) or loc.startswith("/"):
	return loc
	filename = _location_filename(loc)
	if not filename:
	return ""
	return f"/api/{version}/figures/{quote(filename)}"


	def _normalize_figure_manifest_item(version: str, raw: Any) -> dict[str, Any] \| None:
	if isinstance(raw, str):
	loc = raw.strip()
	if not loc:
	return None
	pretty = _prettify_figure_name(_location_filename(loc) or loc)
	return {
	"name": pretty or loc,
	"tags": _tags_from_label(pretty or loc),
	"location": loc,
	"url": _location_to_serve_url(version, loc),
	}

	if not isinstance(raw, dict):
	return None

	location = str(
	raw.get("location")
	or raw.get("url")
	or raw.get("path")
	or raw.get("file")
	or raw.get("src")
	or ""
	).strip()
	name = str(raw.get("name") or "").strip()

	if not location and name and Path(name).suffix:
	location = name
	if not location:
	return None

	if not name:
	name = _prettify_figure_name(_location_filename(location) or location)
	if not name:
	name = location

	raw_tags = raw.get("tags", [])
	if isinstance(raw_tags, str):
	tags = [t.strip().lower() for t in raw_tags.split(",") if t.strip()]
	elif isinstance(raw_tags, list):
	tags = [str(t).strip().lower() for t in raw_tags if str(t).strip()]
	else:
	tags = []
	if not tags:
	tags = _tags_from_label(name)

	# De-duplicate while preserving order
	deduped_tags: list[str] = []
	seen_tags: set[str] = set()
	for t in tags:
	if t in seen_tags:
	continue
	seen_tags.add(t)
	deduped_tags.append(t)

	return {
	"name": name,
	"tags": deduped_tags,
	"location": location,
	"url": _location_to_serve_url(version, location),
	}


	def _version_figures_manifest(version: str) -> list[dict[str, Any]]:
	payload = _safe_read_json_any(
	_version_root(version) / "figures.json",
	_hf_version_url(version, "figures.json"),
	)

	if isinstance(payload, list):
	raw_items = payload
	elif isinstance(payload, dict):
	figures = payload.get("figures")
	items = payload.get("items")
	if isinstance(figures, list):
	raw_items = figures
	elif isinstance(items, list):
	raw_items = items
	else:
	raw_items = []
	else:
	raw_items = []

	manifest: list[dict[str, Any]] = []
	seen: set[tuple[str, str]] = set()
	for item in raw_items:
	normalized = _normalize_figure_manifest_item(version, item)
	if not normalized:
	continue
	key = (normalized["name"].lower(), normalized["location"])
	if key in seen:
	continue
	seen.add(key)
	manifest.append(normalized)

	if manifest:
	return manifest

	# Fallback manifest generated from discovered figure filenames.
	fallback: list[dict[str, Any]] = []
	for filename in _version_figures(version):
	normalized = _normalize_figure_manifest_item(version, filename)
	if normalized:
	fallback.append(normalized)
	return fallback


	def _safe_read_csv(path: Path, remote_url: str \| None = None) -> list[dict]:
	"""Read CSV from disk, then fallback to HF raw URL."""
	try:
	if path.exists():
	df = pd.read_csv(path)
	return json.loads(df.to_json(orient="records"))
	except Exception:
	pass
	if not remote_url:
	return []
	text = _read_remote_text(remote_url)
	if text is None:
	return []
	try:
	df = pd.read_csv(StringIO(text))
	return json.loads(df.to_json(orient="records"))
	except Exception:
	return []


	def _safe_read_json(path: Path, remote_url: str \| None = None) -> dict:
	"""Read JSON from disk, then fallback to HF raw URL."""
	if path.exists():
	try:
	data = json.loads(path.read_text(encoding="utf-8"))
	return data if isinstance(data, dict) else {}
	except Exception:
	pass
	if not remote_url:
	return {}
	text = _read_remote_text(remote_url)
	if not text:
	return {}
	try:
	data = json.loads(text)
	return data if isinstance(data, dict) else {}
	except Exception:
	return {}


	def _load_dataset_info() -> dict:
	"""Load global dataset metadata used by Dataset/Validation tabs."""
	return _safe_read_json(
	_ARTIFACTS / "dataset.json",
	_hf_url("dataset.json"),
	)


	def _safe_read_csv_first(version: str, rel_paths: list[str]) -> list[dict]:
	for rel in rel_paths:
	path = _version_root(version) / rel
	rows = _safe_read_csv(path, _hf_version_url(version, rel))
	if rows:
	return rows
	return []


	def _safe_read_json_first(version: str, rel_paths: list[str]) -> dict:
	for rel in rel_paths:
	path = _version_root(version) / rel
	data = _safe_read_json(path, _hf_version_url(version, rel))
	if data:
	return data
	return {}


	def _version_root(version: str) -> Path:
	return _ARTIFACTS / version


	def _ensure_version(version: str) -> None:
	if version not in _SUPPORTED_VERSIONS:
	raise HTTPException(400, f"Unknown version '{version}'")


	def _version_figures(version: str) -> list[str]:
	fig_dir = _version_root(version) / "figures"
	if not fig_dir.exists():
	local = []
	else:
	local = sorted([
	f.name
	for f in fig_dir.iterdir()
	if f.is_file() and f.suffix.lower() in (".png", ".svg", ".jpg", ".jpeg", ".webp")
	])
	if local:
	return local

	# Local datamap may only list downloaded subsets on Spaces.
	# If it does not contain figure entries, explicitly query HF raw datamap.
	local_datamap = _safe_read_json(_version_root(version) / "datamap.json", None)
	local_map_figs = _extract_figure_names_from_datamap(local_datamap)
	if local_map_figs:
	return local_map_figs

	remote_datamap = _read_remote_json(_hf_version_url(version, "datamap.json"))
	remote_map_figs = _extract_figure_names_from_datamap(remote_datamap)
	if remote_map_figs:
	return remote_map_figs

	# Last-resort fallback: Hugging Face tree API listing for /<version>/figures
	return _list_tree_api_figures(version)


	def _battery_stats_for_version(version: str) -> dict:
	root = _version_root(version)
	features_candidates = [
	root / "features" / "battery_features.csv",
	root / "results" / "battery_features.csv",
	]
	features_path = next((p for p in features_candidates if p.exists()), None)
	if not features_path:
	return {}
	df = pd.read_csv(features_path)
	stats = {
	"total_samples": len(df),
	"batteries": int(df["battery_id"].nunique()) if "battery_id" in df.columns else 0,
	"feature_columns": [
	c for c in df.columns.tolist()
	if c not in ["battery_id", "datetime", "SoH", "RUL", "degradation_state"]
	],
	}
	if "SoH" in df.columns:
	stats.update({
	"avg_soh": round(float(df["SoH"].mean()), 2),
	"min_soh": round(float(df["SoH"].min()), 2),
	"max_soh": round(float(df["SoH"].max()), 2),
	})
	if "RUL" in df.columns:
	stats["avg_rul"] = round(float(df["RUL"].mean()), 1)
	if "degradation_state" in df.columns:
	stats["degradation_distribution"] = json.loads(df["degradation_state"].value_counts().to_json())
	if "ambient_temperature" in df.columns:
	stats["temp_groups"] = sorted(df["ambient_temperature"].dropna().unique().tolist())
	return stats


	def _build_metrics_payload(version: str) -> dict:
	_ensure_version(version)
	root = _version_root(version)

	results = root / "results"
	reports = root / "reports"
	models_meta = _safe_read_json(root / "models.json", _hf_version_url(version, "models.json"))
	datamap = _safe_read_json(root / "datamap.json", _hf_version_url(version, "datamap.json"))
	dataset_info = _load_dataset_info()

	unified = _safe_read_csv_first(version, ["results/unified_results.csv"])
	classical_results = _safe_read_csv_first(version, [
	"results/classical_results.csv",
	"results/classical_soh_results.csv",
	])
	classical_soh = _safe_read_csv_first(version, ["results/classical_soh_results.csv"])
	lstm_results = _safe_read_csv_first(version, ["results/lstm_soh_results.csv"])
	ensemble_results = _safe_read_csv_first(version, ["results/ensemble_results.csv"])
	transformer_results = _safe_read_csv_first(version, ["results/transformer_soh_results.csv"])
	validation = _safe_read_csv_first(version, [
	"results/model_validation.csv",
	"reports/model_validation.csv",
	])
	rankings = _safe_read_csv_first(version, ["results/final_rankings.csv"])
	classical_rul = _safe_read_csv_first(version, ["results/classical_rul_results.csv"])

	training_summary = _safe_read_json_first(version, [
	"results/training_summary.json",
	"reports/training_summary.json",
	])
	validation_summary = _safe_read_json_first(version, [
	"results/validation_summary.json",
	"reports/validation_summary.json",
	])
	intra_battery = _safe_read_json_first(version, [
	"results/intra_battery.json",
	"reports/intra_battery.json",
	])
	vae_lstm = _safe_read_json_first(version, ["results/vae_lstm_results.json"])
	dg_itransformer = _safe_read_json_first(version, ["results/dg_itransformer_results.json"])

	# Fallback: build unified/classical-like rows directly from models.json when
	# result CSVs are not yet downloaded for a version.
	if not unified and isinstance(models_meta, dict):
	model_rows = []
	for name, info in (models_meta.get("models") or {}).items():
	if not isinstance(info, dict):
	continue
	model_rows.append({
	"model": name,
	"family": info.get("family"),
	"R2": info.get("r2"),
	"MAE": info.get("mae"),
	"RMSE": info.get("rmse"),
	"MAPE": info.get("mape"),
	"within_5pct": info.get("within_5pct"),
	"f1_macro": info.get("f1_macro"),
	"f1_weighted": info.get("f1_weighted"),
	})
	unified = model_rows
	if not classical_results:
	classical_results = [r for r in model_rows if (r.get("family") or "").startswith("classical")]

	# Fallback summaries derived from unified rows
	if not training_summary and unified:
	valid_r2 = [r.get("R2") for r in unified if isinstance(r.get("R2"), (int, float))]
	valid_w5 = [r.get("within_5pct") for r in unified if isinstance(r.get("within_5pct"), (int, float))]
	best = max(unified, key=lambda r: r.get("R2") if isinstance(r.get("R2"), (int, float)) else -999)
	training_summary = {
	"best_model": best.get("model"),
	"best_r2": best.get("R2"),
	"best_within_5pct": best.get("within_5pct"),
	"total_models": len(unified),
	"mean_within_5pct": (sum(valid_w5) / len(valid_w5)) if valid_w5 else None,
	"passed_models": sum(1 for v in valid_w5 if v >= 95.0),
	"pass_rate_pct": (sum(1 for v in valid_w5 if v >= 95.0) / len(valid_w5) * 100.0) if valid_w5 else 0.0,
	"mean_r2": (sum(valid_r2) / len(valid_r2)) if valid_r2 else None,
	}

	figures_manifest = _version_figures_manifest(version)
	figures: list[str] = []
	seen_names: set[str] = set()
	for item in figures_manifest:
	loc = str(item.get("location", ""))
	name = _location_filename(loc) or str(item.get("name", "")).strip()
	if not name or name in seen_names:
	continue
	seen_names.add(name)
	figures.append(name)

	return {
	"version": version,
	"models_meta": models_meta,
	"datamap": datamap,
	"unified_results": unified,
	"classical_results": classical_results,
	"classical_soh": classical_soh,
	"lstm_results": lstm_results,
	"ensemble_results": ensemble_results,
	"transformer_results": transformer_results,
	"validation": validation,
	"rankings": rankings,
	"classical_rul": classical_rul,
	"training_summary": training_summary,
	"validation_summary": validation_summary,
	"intra_battery": intra_battery,
	"vae_lstm": vae_lstm,
	"dg_itransformer": dg_itransformer,
	"figures": figures,
	"figures_manifest": figures_manifest,
	"battery_stats": _battery_stats_for_version(version),
	"dataset_info": dataset_info,
	}


	@router.get("/metrics")
	async def get_metrics():
	"""Default metrics endpoint: latest version (v3)."""
	return _build_metrics_payload("v3")


	@router.get("/{version}/metrics")
	async def get_metrics_for_version(version: str):
	"""Return version-aware metrics payload from artifacts/{version}."""
	return _build_metrics_payload(version)


	@router.get("/{version}/figures")
	async def list_version_figures(version: str):
	_ensure_version(version)
	manifest = _version_figures_manifest(version)
	names: list[str] = []
	seen: set[str] = set()
	for item in manifest:
	loc = str(item.get("location", ""))
	name = _location_filename(loc) or str(item.get("name", "")).strip()
	if not name or name in seen:
	continue
	seen.add(name)
	names.append(name)
	return names


	@router.get("/{version}/figures.json")
	async def get_version_figures_manifest(version: str):
	"""Return versioned figure manifest (name/tags/location/url)."""
	_ensure_version(version)
	return _version_figures_manifest(version)


	@router.get("/{version}/figures/{filename}")
	async def get_version_figure(version: str, filename: str):
	"""Serve saved figures from artifacts/{version}/figures."""
	_ensure_version(version)
	path = _version_root(version) / "figures" / filename
	if not path.exists():
	requested = Path(filename).name
	if requested != filename:
	raise HTTPException(400, "Invalid figure filename")
	if requested not in set(_version_figures(version)):
	raise HTTPException(404, f"Figure {filename} not found for {version}")
	return RedirectResponse(
	url=_hf_version_url(version, f"figures/{requested}"),
	status_code=307,
	)
	content_type = "image/png"
	if path.suffix == ".html":
	content_type = "text/html"
	elif path.suffix == ".svg":
	content_type = "image/svg+xml"
	elif path.suffix.lower() in (".jpg", ".jpeg"):
	content_type = "image/jpeg"
	elif path.suffix.lower() == ".webp":
	content_type = "image/webp"
	return FileResponse(path, media_type=content_type)


	@router.get("/v2/figures/{filename}")
	async def get_v2_figure(filename: str):
	"""Backward-compatible alias for v2 figure endpoint."""
	return await get_version_figure("v2", filename)