guychuk
/

PhysioJEPA

self-supervised

representation-learning

Model card Files Files and versions

PhysioJEPA / scripts /eval_checkpoint.py

guychuk's picture

Upload folder using huggingface_hub

31e2456 verified 20 days ago

history blame contribute delete

2.95 kB

	"""Evaluate a trained checkpoint on PTB-XL AF + downstream probes.

	Loads the model from `--ckpt`, fetches PTB-XL via HF, extracts pooled latents
	from the ECG encoder, runs a logistic-regression linear probe, and writes
	results JSON.

	Used at epoch 25 (K-gate eval) and epoch 100 (final eval).
	"""
	from __future__ import annotations

	import argparse
	import json
	import os
	from pathlib import Path

	import numpy as np
	import torch
	from dotenv import load_dotenv

	load_dotenv()
	os.environ.setdefault("HF_TOKEN", os.environ.get("HUGGINGFACE_API_KEY", ""))

	import sys
	sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))

	from physiojepa.models import MODEL_REGISTRY, ModelConfig
	from physiojepa.probe import linear_probe_auroc, pooled_features


	def get_ecg_encoder(model_letter: str, model: torch.nn.Module) -> torch.nn.Module:
	if model_letter == "A":
	return model.ecg
	if model_letter == "C":
	return model.ecg
	return model.bb.ecg


	def main() -> None:
	ap = argparse.ArgumentParser()
	ap.add_argument("--ckpt", required=True)
	ap.add_argument("--model", required=True, choices=["A", "B", "C", "F"])
	ap.add_argument("--ptbxl_npz", default="/workspace/cache/ptbxl_af.npz")
	ap.add_argument("--out", required=True)
	args = ap.parse_args()

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	sd = torch.load(args.ckpt, map_location=device, weights_only=False)
	saved_cfg = sd.get("cfg", {})
	# Respect ablation knobs saved in the TrainConfig
	cfg = ModelConfig(
	pred_depth=saved_cfg.get("pred_depth", 4),
	query_mode=saved_cfg.get("query_mode", "learned"),
	mask_ratio=saved_cfg.get("mask_ratio", 0.50),
	)
	print(f"[eval] model cfg: pred_depth={cfg.pred_depth} query_mode={cfg.query_mode} mask_ratio={cfg.mask_ratio}")
	model = MODEL_REGISTRY[args.model](cfg)
	model.load_state_dict(sd["model"])
	model.to(device)
	model.train(False)
	enc = get_ecg_encoder(args.model, model)

	print(f"[eval] loading PTB-XL cache from {args.ptbxl_npz}")
	arr = np.load(args.ptbxl_npz)
	X, y = arr["X"], arr["y"]
	print(f"[eval] X={X.shape} y_pos={int(y.sum())} y_neg={int((1 - y).sum())}")
	X_t = torch.from_numpy(X)
	feats = pooled_features(enc, X_t, device=device, batch_size=64)

	rng = np.random.default_rng(0)
	idx = rng.permutation(len(y))
	cut = int(len(idx) * 0.8)
	train_idx, test_idx = idx[:cut], idx[cut:]
	auroc = linear_probe_auroc(feats[train_idx], y[train_idx], feats[test_idx], y[test_idx])
	print(f"[eval] AF AUROC = {auroc:.4f}")
	Path(args.out).parent.mkdir(parents=True, exist_ok=True)
	Path(args.out).write_text(json.dumps({
	"ckpt": args.ckpt, "model": args.model, "auroc": auroc,
	"n_train": int(cut), "n_test": int(len(idx) - cut),
	"n_pos": int(y.sum()), "n_neg": int((1 - y).sum()),
	}, indent=2))


	if __name__ == "__main__":
	main()