{
  "model_name": "difficulty_model",
  "model_version": "difficulty_model_v2_baseline_001",
  "dataset_version": "2.0.0",
  "trained_at": "2026-05-21T05:59:09.943332+00:00",
  "seed": 42,
  "split_counts": {
    "train": 3912,
    "validation": 1033,
    "test": 875
  },
  "metrics": {
    "validation": {
      "mae": 0.3475,
      "r_squared": 0.5003,
      "per_bucket_mae": {
        "easy": 0.3058,
        "medium": 0.2934,
        "hard": 0.6563
      }
    },
    "test": {
      "mae": 0.3519,
      "r_squared": 0.4685,
      "per_bucket_mae": {
        "easy": 0.325,
        "medium": 0.2885,
        "hard": 0.6797
      }
    }
  },
  "limitations": [
    "Trained on synthetic data only.",
    "difficulty_score distribution may not reflect real-world difficulty.",
    "OrdinalEncoder assumes an ordering that may not be meaningful for subject/question_type.",
    "Per-bucket MAE depends on the quality of the difficulty string labels."
  ]
}