| """Model diagnostics and validation utilities."""
|
|
|
| from typing import Dict, Optional, Tuple
|
|
|
| import torch
|
| from torch import Tensor
|
| import numpy as np
|
|
|
| from physics_informed_bo.models.base import SurrogateModel
|
|
|
|
|
| def model_diagnostics(
|
| surrogate: SurrogateModel,
|
| X_test: Tensor,
|
| y_test: Tensor,
|
| ) -> Dict:
|
| """Compute diagnostic metrics for the surrogate model.
|
|
|
| Args:
|
| surrogate: Fitted surrogate model.
|
| X_test: Test inputs (n, d).
|
| y_test: Test targets (n, 1).
|
|
|
| Returns:
|
| Dict with RMSE, MAE, R2, NLPD, calibration metrics.
|
| """
|
| mean, var = surrogate.predict(X_test)
|
| mean = mean.squeeze()
|
| var = var.squeeze()
|
| y_test = y_test.squeeze()
|
|
|
| residuals = y_test - mean
|
|
|
|
|
| rmse = float((residuals**2).mean().sqrt())
|
| mae = float(residuals.abs().mean())
|
| ss_res = float((residuals**2).sum())
|
| ss_tot = float(((y_test - y_test.mean()) ** 2).sum())
|
| r2 = 1 - ss_res / (ss_tot + 1e-12)
|
|
|
|
|
| nlpd = float(
|
| 0.5 * (torch.log(2 * torch.pi * var) + residuals**2 / var).mean()
|
| )
|
|
|
|
|
| std = var.sqrt()
|
| in_1sigma = float(((mean - std <= y_test) & (y_test <= mean + std)).float().mean())
|
| in_2sigma = float(((mean - 2 * std <= y_test) & (y_test <= mean + 2 * std)).float().mean())
|
|
|
| return {
|
| "rmse": rmse,
|
| "mae": mae,
|
| "r2": r2,
|
| "nlpd": nlpd,
|
| "calibration_1sigma": in_1sigma,
|
| "calibration_2sigma": in_2sigma,
|
| "mean_predicted_std": float(std.mean()),
|
| "n_test": len(X_test),
|
| }
|
|
|
|
|
| def leave_one_out_cv(
|
| surrogate_class,
|
| surrogate_kwargs: Dict,
|
| X: Tensor,
|
| y: Tensor,
|
| ) -> Dict:
|
| """Perform leave-one-out cross-validation for the surrogate model.
|
|
|
| Args:
|
| surrogate_class: Class of the surrogate model to evaluate.
|
| surrogate_kwargs: Keyword arguments for the surrogate constructor.
|
| X: Full dataset inputs (n, d).
|
| y: Full dataset targets (n, 1).
|
|
|
| Returns:
|
| Dict with LOO-CV metrics.
|
| """
|
| n = len(X)
|
| predictions = torch.zeros(n)
|
| variances = torch.zeros(n)
|
|
|
| for i in range(n):
|
|
|
| mask = torch.ones(n, dtype=torch.bool)
|
| mask[i] = False
|
|
|
| X_train = X[mask]
|
| y_train = y[mask]
|
|
|
| model = surrogate_class(**surrogate_kwargs)
|
| model.fit(X_train, y_train)
|
|
|
| mean_i, var_i = model.predict(X[i:i+1])
|
| predictions[i] = mean_i.squeeze()
|
| variances[i] = var_i.squeeze()
|
|
|
| y_flat = y.squeeze()
|
| residuals = y_flat - predictions
|
|
|
| return {
|
| "loo_rmse": float((residuals**2).mean().sqrt()),
|
| "loo_mae": float(residuals.abs().mean()),
|
| "loo_r2": float(1 - (residuals**2).sum() / ((y_flat - y_flat.mean()) ** 2).sum()),
|
| "loo_nlpd": float(
|
| 0.5 * (torch.log(2 * torch.pi * variances) + residuals**2 / variances).mean()
|
| ),
|
| }
|
|
|