Spaces:

Rthur2003
/

crowncode-backend

Sleeping

App Files Files Community

Rthur2003 commited on Mar 31

Commit

36a8a94

1 Parent(s): 5446f0d

feat: add full training and evaluation pipeline for AURIS

Browse files

Files changed (2) hide show

app/training/run_full_pipeline.py +177 -0
app/training/visualize_results.py +632 -0

app/training/run_full_pipeline.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+AURIS Full Training & Evaluation Pipeline.
+Orchestrates the complete ML pipeline end-to-end:
+  1. Feature extraction (49 audio features + 14 vocal features)
+  2. Heuristic baseline evaluation
+  3. Multi-model training with 5-fold CV
+  4. Publication-quality figure generation
+  5. Results summary
+Usage:
+    python -m app.training.run_full_pipeline
+    # Or with custom paths:
+    python -m app.training.run_full_pipeline \\
+        --manifest data/training/manifest.csv \\
+        --models-dir models \\
+        --figures-dir figures
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+import time
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+def main() -> None:
+    parser = argparse.ArgumentParser(description="AURIS Full Pipeline")
+    parser.add_argument(
+        "--manifest", default="data/training/manifest.csv",
+        help="Path to training manifest CSV",
+    )
+    parser.add_argument(
+        "--models-dir", default="models",
+        help="Directory for saved models",
+    )
+    parser.add_argument(
+        "--figures-dir", default="figures",
+        help="Directory for output figures",
+    )
+    parser.add_argument(
+        "--skip-extract", action="store_true",
+        help="Skip feature extraction (use existing features.csv)",
+    )
+    args = parser.parse_args()
+    manifest_path = Path(args.manifest)
+    features_path = manifest_path.parent / "features.csv"
+    models_dir = Path(args.models_dir)
+    figures_dir = Path(args.figures_dir)
+    total_start = time.time()
+    # ── Step 1: Feature Extraction ─────────────────────────────
+    if not args.skip_extract:
+        print("\n" + "=" * 70)
+        print("  STEP 1 / 5 — Feature Extraction")
+        print("=" * 70)
+        from app.training.extract_features_batch import extract_batch
+        t0 = time.time()
+        features_path = extract_batch(manifest_path)
+        print(f"\n  Extraction time: {time.time() - t0:.1f}s")
+    else:
+        print("\n  [Skipping extraction — using existing features.csv]")
+        if not features_path.exists():
+            print(f"  ERROR: {features_path} not found!")
+            sys.exit(1)
+    # ── Step 2: Heuristic Baseline ─────────────────────────────
+    print("\n" + "=" * 70)
+    print("  STEP 2 / 5 — Heuristic Baseline Evaluation")
+    print("=" * 70)
+    from app.training.evaluate import evaluate_heuristic_baseline
+    baseline_results = evaluate_heuristic_baseline(features_path)
+    # ── Step 3: Multi-Model Training ───────────────────────────
+    print("\n" + "=" * 70)
+    print("  STEP 3 / 5 — Multi-Model Training (5-Fold CV)")
+    print("=" * 70)
+    from app.training.train_classifier import train
+    train_results = train(features_path, models_dir)
+    # Save combined results (training + baseline) with arrays for visualization
+    combined_results = train_results["all_results"].copy()
+    # Add baseline heuristic results
+    if "heuristic_only" in baseline_results:
+        combined_results["Heuristic (no vocals)"] = baseline_results["heuristic_only"]
+    if "heuristic_vocals" in baseline_results:
+        combined_results["Heuristic + Vocals"] = baseline_results["heuristic_vocals"]
+    # Add metadata
+    combined_results["_best_model"] = train_results["best_model"]
+    combined_results["_n_samples"] = int(train_results["all_results"][
+        train_results["best_model"]
+    ]["y_true"].__len__())
+    combined_results["_n_features"] = len(train_results["feature_cols"])
+    combined_results["_n_folds"] = 5
+    # Load feature importance from saved results
+    results_json_path = models_dir / "training_results.json"
+    if results_json_path.exists():
+        with open(results_json_path, "r") as f:
+            saved = json.load(f)
+        if "_feature_importance" in saved:
+            combined_results["_feature_importance"] = saved["_feature_importance"]
+    # Save full results with arrays for visualization
+    full_results_path = models_dir / "full_training_results.json"
+    serializable = {}
+    for key, value in combined_results.items():
+        if isinstance(value, dict):
+            serializable[key] = {
+                k: (v.tolist() if hasattr(v, "tolist") else v)
+                for k, v in value.items()
+            }
+        else:
+            serializable[key] = value
+    with open(full_results_path, "w") as f:
+        json.dump(serializable, f, indent=2)
+    # ── Step 4: Visualization ──────────────────────────────────
+    print("\n" + "=" * 70)
+    print("  STEP 4 / 5 — Publication-Quality Figures")
+    print("=" * 70)
+    from app.training.visualize_results import generate_all_figures
+    generate_all_figures(full_results_path, features_path, figures_dir)
+    # ── Step 5: Summary ────────────────────────────────────────
+    print("\n" + "=" * 70)
+    print("  STEP 5 / 5 — Final Summary")
+    print("=" * 70)
+    total_time = time.time() - total_start
+    print(f"\n  Pipeline completed in {total_time:.1f}s ({total_time / 60:.1f}m)")
+    print(f"\n  Best model: {train_results['best_model']}")
+    print(f"  Best AUC:   {train_results['best_auc']:.4f}")
+    print(f"\n  Artifacts:")
+    print(f"    Model:    {train_results['model_path']}")
+    print(f"    Results:  {full_results_path}")
+    print(f"    Figures:  {figures_dir}/")
+    print(f"    Features: {features_path}")
+    # Print all model results in a table
+    print(f"\n  {'Model':<25} {'Acc':>7} {'Prec':>7} {'Rec':>7} {'F1':>7} {'AUC':>7}")
+    print(f"  {'─' * 25} {'─' * 7} {'─' * 7} {'─' * 7} {'─' * 7} {'─' * 7}")
+    for name in sorted(train_results["all_results"].keys()):
+        data = train_results["all_results"][name]
+        print(
+            f"  {name:<25} "
+            f"{data.get('accuracy', 0):>7.4f} "
+            f"{data.get('precision', 0):>7.4f} "
+            f"{data.get('recall', 0):>7.4f} "
+            f"{data.get('f1', 0):>7.4f} "
+            f"{data.get('roc_auc', 0):>7.4f}"
+        )
+    print("\n" + "=" * 70)
+    print("  AURIS Training Pipeline Complete!")
+    print("=" * 70)
+if __name__ == "__main__":
+    main()

app/training/visualize_results.py ADDED Viewed

	@@ -0,0 +1,632 @@

+"""
+Publication-quality visualization pipeline for AURIS.
+Generates all figures required for an academic paper / conference
+submission on AI-generated music detection:
+  1. ROC curves (per-model overlay)
+  2. Precision-Recall curves (per-model overlay)
+  3. Confusion matrices (heatmap per model)
+  4. Model comparison bar chart (Accuracy, F1, AUC side-by-side)
+  5. Feature importance (top-N horizontal bar)
+  6. Feature correlation heatmap
+  7. Feature distribution violin plots (AI vs Human)
+  8. Training summary table (LaTeX-ready)
+Usage:
+    python -m app.training.visualize_results \\
+        --results models/training_results.json \\
+        --features data/training/features.csv \\
+        --output figures/
+All figures are saved at 300 DPI in both PNG and PDF formats
+for direct inclusion in LaTeX / Word documents.
+"""
+from __future__ import annotations
+import argparse
+import csv
+import json
+import sys
+from pathlib import Path
+from typing import Any
+import numpy as np
+try:
+    import matplotlib
+    matplotlib.use("Agg")  # Non-interactive backend for server/CI
+    import matplotlib.pyplot as plt
+    import matplotlib.ticker as mticker
+    from matplotlib.colors import LinearSegmentedColormap
+except ImportError:
+    print("ERROR: matplotlib required. pip install matplotlib")
+    sys.exit(1)
+try:
+    import seaborn as sns
+    HAS_SEABORN = True
+except ImportError:
+    HAS_SEABORN = False
+from sklearn.metrics import (
+    auc,
+    confusion_matrix,
+    precision_recall_curve,
+    roc_curve,
+)
+# ═══════════════════════════════════════════════════════════════════════
+# Style configuration — academic paper quality
+# ═══════════════════════════════════════════════════════════════════════
+# Color palette — distinct, colorblind-friendly
+MODEL_COLORS = {
+    "Logistic Regression": "#4363d8",
+    "Random Forest": "#3cb44b",
+    "Gradient Boosting": "#e6194b",
+    "SVM (RBF)": "#f58231",
+    "MLP Neural Network": "#911eb4",
+    "XGBoost": "#42d4f4",
+    "LightGBM": "#f032e6",
+    "Heuristic (no vocals)": "#808080",
+    "Heuristic + Vocals": "#a9a9a9",
+}
+AURIS_BLUE = "#1a73e8"
+AURIS_RED = "#e8431a"
+plt.rcParams.update({
+    "font.family": "serif",
+    "font.size": 11,
+    "axes.titlesize": 13,
+    "axes.labelsize": 12,
+    "xtick.labelsize": 10,
+    "ytick.labelsize": 10,
+    "legend.fontsize": 9,
+    "figure.dpi": 150,
+    "savefig.dpi": 300,
+    "savefig.bbox": "tight",
+    "savefig.pad_inches": 0.1,
+    "axes.grid": True,
+    "grid.alpha": 0.3,
+    "axes.spines.top": False,
+    "axes.spines.right": False,
+})
+def _save_fig(fig: plt.Figure, output_dir: Path, name: str) -> None:
+    """Save figure in both PNG and PDF formats."""
+    fig.savefig(output_dir / f"{name}.png", format="png")
+    fig.savefig(output_dir / f"{name}.pdf", format="pdf")
+    plt.close(fig)
+    print(f"  Saved: {name}.png / .pdf")
+def _get_color(name: str) -> str:
+    return MODEL_COLORS.get(name, "#333333")
+# ═══════════════════════════════════════════════════════════════════════
+# Figure 1: ROC Curves
+# ═══════════════════════════════════════════════════════════════════════
+def plot_roc_curves(
+    results: dict[str, Any],
+    output_dir: Path,
+) -> None:
+    """Plot ROC curves for all models on the same axes."""
+    fig, ax = plt.subplots(figsize=(7, 6))
+    for name, data in results.items():
+        if name.startswith("_"):
+            continue
+        y_true = np.array(data["y_true"])
+        y_prob = np.array(data["y_prob"])
+        fpr, tpr, _ = roc_curve(y_true, y_prob)
+        roc_auc = auc(fpr, tpr)
+        ax.plot(
+            fpr, tpr,
+            color=_get_color(name),
+            linewidth=2,
+            label=f"{name} (AUC = {roc_auc:.3f})",
+        )
+    # Diagonal reference
+    ax.plot([0, 1], [0, 1], "k--", linewidth=1, alpha=0.5, label="Random (AUC = 0.500)")
+    ax.set_xlim([-0.02, 1.02])
+    ax.set_ylim([-0.02, 1.02])
+    ax.set_xlabel("False Positive Rate")
+    ax.set_ylabel("True Positive Rate")
+    ax.set_title("ROC Curves — AURIS Model Comparison")
+    ax.legend(loc="lower right", framealpha=0.9)
+    ax.set_aspect("equal")
+    _save_fig(fig, output_dir, "fig1_roc_curves")
+# ═══════════════════════════════════════════════════════════════════════
+# Figure 2: Precision-Recall Curves
+# ═══════════════════════════════════════════════════════════════════════
+def plot_pr_curves(
+    results: dict[str, Any],
+    output_dir: Path,
+) -> None:
+    """Plot Precision-Recall curves for all models."""
+    fig, ax = plt.subplots(figsize=(7, 6))
+    for name, data in results.items():
+        if name.startswith("_"):
+            continue
+        y_true = np.array(data["y_true"])
+        y_prob = np.array(data["y_prob"])
+        precision, recall, _ = precision_recall_curve(y_true, y_prob)
+        pr_auc = auc(recall, precision)
+        ax.plot(
+            recall, precision,
+            color=_get_color(name),
+            linewidth=2,
+            label=f"{name} (AP = {pr_auc:.3f})",
+        )
+    # Baseline: proportion of positives
+    all_y = []
+    for name, data in results.items():
+        if not name.startswith("_"):
+            all_y = data["y_true"]
+            break
+    baseline = np.mean(all_y) if all_y else 0.5
+    ax.axhline(y=baseline, color="k", linestyle="--", linewidth=1, alpha=0.5,
+               label=f"Baseline ({baseline:.2f})")
+    ax.set_xlim([-0.02, 1.02])
+    ax.set_ylim([-0.02, 1.05])
+    ax.set_xlabel("Recall")
+    ax.set_ylabel("Precision")
+    ax.set_title("Precision-Recall Curves — AURIS Model Comparison")
+    ax.legend(loc="lower left", framealpha=0.9)
+    _save_fig(fig, output_dir, "fig2_pr_curves")
+# ═══════════════════════════════════════════════════════════════════════
+# Figure 3: Confusion Matrices
+# ═══════════════════════════════════════════════════════════════════════
+def plot_confusion_matrices(
+    results: dict[str, Any],
+    output_dir: Path,
+) -> None:
+    """Plot confusion matrix heatmap for each model."""
+    model_names = [k for k in results if not k.startswith("_")]
+    n_models = len(model_names)
+    cols = min(3, n_models)
+    rows = (n_models + cols - 1) // cols
+    fig, axes = plt.subplots(rows, cols, figsize=(5 * cols, 4.5 * rows))
+    if n_models == 1:
+        axes = np.array([axes])
+    axes = axes.flatten()
+    cmap = LinearSegmentedColormap.from_list("auris", ["#ffffff", AURIS_BLUE])
+    for idx, name in enumerate(model_names):
+        ax = axes[idx]
+        data = results[name]
+        y_true = np.array(data["y_true"])
+        y_pred = np.array(data["y_pred"])
+        cm = confusion_matrix(y_true, y_pred)
+        cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
+        im = ax.imshow(cm_norm, interpolation="nearest", cmap=cmap, vmin=0, vmax=1)
+        # Annotate cells with count and percentage
+        for i in range(2):
+            for j in range(2):
+                color = "white" if cm_norm[i, j] > 0.6 else "black"
+                ax.text(j, i, f"{cm[i, j]}\n({cm_norm[i, j]:.1%})",
+                        ha="center", va="center", fontsize=12, color=color,
+                        fontweight="bold")
+        ax.set_xticks([0, 1])
+        ax.set_yticks([0, 1])
+        ax.set_xticklabels(["Human", "AI"])
+        ax.set_yticklabels(["Human", "AI"])
+        ax.set_xlabel("Predicted")
+        ax.set_ylabel("Actual")
+        ax.set_title(name, fontsize=11)
+    # Hide unused axes
+    for idx in range(n_models, len(axes)):
+        axes[idx].set_visible(False)
+    fig.suptitle("Confusion Matrices — AURIS Model Comparison", fontsize=14, y=1.02)
+    fig.tight_layout()
+    _save_fig(fig, output_dir, "fig3_confusion_matrices")
+# ═══════════════════════════════════════════════════════════════════════
+# Figure 4: Model Comparison Bar Chart
+# ═══════════════════════════════════════════════════════════════════════
+def plot_model_comparison(
+    results: dict[str, Any],
+    output_dir: Path,
+) -> None:
+    """Bar chart comparing Accuracy, F1, Precision, Recall, AUC across models."""
+    model_names = [k for k in results if not k.startswith("_")]
+    metrics = ["accuracy", "precision", "recall", "f1", "roc_auc"]
+    metric_labels = ["Accuracy", "Precision", "Recall", "F1 Score", "ROC-AUC"]
+    x = np.arange(len(model_names))
+    width = 0.15
+    metric_colors = ["#4363d8", "#3cb44b", "#e6194b", "#f58231", "#911eb4"]
+    fig, ax = plt.subplots(figsize=(max(10, len(model_names) * 2), 6))
+    for i, (metric, label, color) in enumerate(zip(metrics, metric_labels, metric_colors)):
+        values = []
+        for name in model_names:
+            val = results[name].get(metric, 0)
+            values.append(val if val is not None else 0)
+        bars = ax.bar(x + i * width, values, width, label=label, color=color, alpha=0.85)
+        # Value labels on bars
+        for bar, val in zip(bars, values):
+            ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01,
+                    f"{val:.2f}", ha="center", va="bottom", fontsize=7)
+    ax.set_xlabel("Model")
+    ax.set_ylabel("Score")
+    ax.set_title("Model Performance Comparison — AURIS")
+    ax.set_xticks(x + width * 2)
+    ax.set_xticklabels(model_names, rotation=25, ha="right")
+    ax.set_ylim([0, 1.12])
+    ax.legend(loc="upper right", ncol=5, framealpha=0.9)
+    ax.yaxis.set_major_formatter(mticker.PercentFormatter(1.0))
+    fig.tight_layout()
+    _save_fig(fig, output_dir, "fig4_model_comparison")
+# ═══════════════════════════════════════════════════════════════════════
+# Figure 5: Feature Importance
+# ═══════════════════════════════════════════════════════════════════════
+def plot_feature_importance(
+    results: dict[str, Any],
+    output_dir: Path,
+    top_n: int = 20,
+) -> None:
+    """Horizontal bar chart of top-N feature importances."""
+    importance = results.get("_feature_importance")
+    if not importance:
+        print("  Skipping feature importance — no data available")
+        return
+    # Sort and take top N
+    sorted_features = sorted(importance.items(), key=lambda x: x[1], reverse=True)[:top_n]
+    names = [f[0] for f in reversed(sorted_features)]
+    values = [f[1] for f in reversed(sorted_features)]
+    fig, ax = plt.subplots(figsize=(8, max(6, top_n * 0.35)))
+    colors = plt.cm.Blues(np.linspace(0.3, 0.9, len(names)))
+    bars = ax.barh(names, values, color=colors, edgecolor="white", linewidth=0.5)
+    # Value labels
+    for bar, val in zip(bars, values):
+        ax.text(bar.get_width() + 0.002, bar.get_y() + bar.get_height() / 2,
+                f"{val:.4f}", ha="left", va="center", fontsize=9)
+    ax.set_xlabel("Relative Importance")
+    ax.set_title(f"Top {top_n} Feature Importances — AURIS ({results.get('_best_model', '')})")
+    ax.set_xlim([0, max(values) * 1.15])
+    fig.tight_layout()
+    _save_fig(fig, output_dir, "fig5_feature_importance")
+# ═══════════════════════════════════════════════════════════════════════
+# Figure 6: Feature Correlation Heatmap
+# ═══════════════════════════════════════════════════════════════════════
+def plot_correlation_heatmap(
+    features_csv: Path,
+    output_dir: Path,
+) -> None:
+    """Correlation heatmap of all features."""
+    X, y, feature_cols = _load_features_with_names(features_csv)
+    corr = np.corrcoef(X.T)
+    fig, ax = plt.subplots(figsize=(max(12, len(feature_cols) * 0.4),
+                                     max(10, len(feature_cols) * 0.35)))
+    cmap = "RdBu_r" if HAS_SEABORN else "coolwarm"
+    if HAS_SEABORN:
+        sns.heatmap(
+            corr, xticklabels=feature_cols, yticklabels=feature_cols,
+            cmap=cmap, center=0, vmin=-1, vmax=1,
+            square=True, linewidths=0.5, ax=ax,
+            cbar_kws={"shrink": 0.8, "label": "Pearson Correlation"},
+        )
+    else:
+        im = ax.imshow(corr, cmap=cmap, vmin=-1, vmax=1, aspect="auto")
+        ax.set_xticks(range(len(feature_cols)))
+        ax.set_yticks(range(len(feature_cols)))
+        ax.set_xticklabels(feature_cols, rotation=90, fontsize=7)
+        ax.set_yticklabels(feature_cols, fontsize=7)
+        fig.colorbar(im, ax=ax, shrink=0.8, label="Pearson Correlation")
+    ax.set_title("Feature Correlation Matrix — AURIS", fontsize=14, pad=20)
+    fig.tight_layout()
+    _save_fig(fig, output_dir, "fig6_correlation_heatmap")
+# ═══════════════════════════════════════════════════════════════════════
+# Figure 7: Feature Distribution (Violin / Box Plots)
+# ═══════════════════════════════════════════════════════════════════════
+def plot_feature_distributions(
+    features_csv: Path,
+    output_dir: Path,
+    results: dict[str, Any] | None = None,
+    top_n: int = 12,
+) -> None:
+    """Violin plots showing feature distributions for AI vs Human."""
+    X, y, feature_cols = _load_features_with_names(features_csv)
+    # Select top features by importance, or first N
+    if results and "_feature_importance" in results:
+        sorted_feats = sorted(
+            results["_feature_importance"].items(),
+            key=lambda x: x[1], reverse=True,
+        )
+        selected = [f[0] for f in sorted_feats[:top_n] if f[0] in feature_cols]
+    else:
+        selected = feature_cols[:top_n]
+    n_features = len(selected)
+    cols = 3
+    rows = (n_features + cols - 1) // cols
+    fig, axes = plt.subplots(rows, cols, figsize=(5 * cols, 3.5 * rows))
+    axes = axes.flatten()
+    for idx, feat_name in enumerate(selected):
+        ax = axes[idx]
+        col_idx = feature_cols.index(feat_name)
+        human_vals = X[y == 0, col_idx]
+        ai_vals = X[y == 1, col_idx]
+        if HAS_SEABORN:
+            data_list = []
+            for val in human_vals:
+                data_list.append({"Feature": feat_name, "Value": val, "Class": "Human"})
+            for val in ai_vals:
+                data_list.append({"Feature": feat_name, "Value": val, "Class": "AI"})
+            import pandas as pd
+            df = pd.DataFrame(data_list)
+            sns.violinplot(
+                data=df, x="Class", y="Value",
+                palette={"Human": AURIS_BLUE, "AI": AURIS_RED},
+                ax=ax, inner="quartile", linewidth=1,
+            )
+        else:
+            parts = ax.violinplot(
+                [human_vals, ai_vals],
+                positions=[0, 1],
+                showmeans=True, showmedians=True,
+            )
+            ax.set_xticks([0, 1])
+            ax.set_xticklabels(["Human", "AI"])
+        ax.set_title(feat_name, fontsize=10)
+        ax.set_xlabel("")
+    for idx in range(n_features, len(axes)):
+        axes[idx].set_visible(False)
+    fig.suptitle("Feature Distributions — AI vs Human", fontsize=14, y=1.01)
+    fig.tight_layout()
+    _save_fig(fig, output_dir, "fig7_feature_distributions")
+# ═══════════════════════════════════════════════════════════════════════
+# Figure 8: Training Summary Table (LaTeX)
+# ═══════════════════════════════════════════════════════════════════════
+def generate_latex_table(
+    results: dict[str, Any],
+    output_dir: Path,
+) -> None:
+    """Generate LaTeX-ready comparison table."""
+    model_names = [k for k in results if not k.startswith("_")]
+    best_model = results.get("_best_model", "")
+    lines = [
+        r"\begin{table}[htbp]",
+        r"\centering",
+        r"\caption{AURIS Model Performance Comparison}",
+        r"\label{tab:model-comparison}",
+        r"\begin{tabular}{lccccc}",
+        r"\toprule",
+        r"Model & Accuracy & Precision & Recall & F1 & ROC-AUC \\",
+        r"\midrule",
+    ]
+    for name in model_names:
+        data = results[name]
+        acc = data.get("accuracy", 0)
+        prec = data.get("precision", 0)
+        rec = data.get("recall", 0)
+        f1 = data.get("f1", 0)
+        roc = data.get("roc_auc", 0)
+        # Bold the best model
+        prefix = r"\textbf{" if name == best_model else ""
+        suffix = "}" if name == best_model else ""
+        row = (
+            f"  {prefix}{name}{suffix} & "
+            f"{prefix}{acc:.4f}{suffix} & "
+            f"{prefix}{prec:.4f}{suffix} & "
+            f"{prefix}{rec:.4f}{suffix} & "
+            f"{prefix}{f1:.4f}{suffix} & "
+            f"{prefix}{roc:.4f}{suffix} \\\\"
+        )
+        lines.append(row)
+    lines.extend([
+        r"\bottomrule",
+        r"\end{tabular}",
+        r"\end{table}",
+    ])
+    latex_content = "\n".join(lines)
+    table_path = output_dir / "table1_model_comparison.tex"
+    table_path.write_text(latex_content, encoding="utf-8")
+    print(f"  Saved: table1_model_comparison.tex")
+    # Also save as markdown for README
+    md_lines = [
+        "| Model | Accuracy | Precision | Recall | F1 | ROC-AUC |",
+        "|-------|----------|-----------|--------|-----|---------|",
+    ]
+    for name in model_names:
+        data = results[name]
+        bold = "**" if name == best_model else ""
+        md_lines.append(
+            f"| {bold}{name}{bold} | "
+            f"{data.get('accuracy', 0):.4f} | "
+            f"{data.get('precision', 0):.4f} | "
+            f"{data.get('recall', 0):.4f} | "
+            f"{data.get('f1', 0):.4f} | "
+            f"{data.get('roc_auc', 0):.4f} |"
+        )
+    md_path = output_dir / "table1_model_comparison.md"
+    md_path.write_text("\n".join(md_lines), encoding="utf-8")
+    print(f"  Saved: table1_model_comparison.md")
+# ═════════════════════════════════════════════���═════════════════════════
+# Utilities
+# ═══════════════════════════════════════════════════════════════════════
+def _load_features_with_names(
+    features_csv: Path,
+) -> tuple[np.ndarray, np.ndarray, list[str]]:
+    """Load features CSV returning X, y, and column names."""
+    rows = []
+    labels = []
+    with open(features_csv, "r", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        feature_cols = [
+            c for c in reader.fieldnames
+            if c not in ("file_path", "label_int")
+        ]
+        for row in reader:
+            feat_values = []
+            for col in feature_cols:
+                try:
+                    feat_values.append(float(row[col]))
+                except (ValueError, KeyError):
+                    feat_values.append(0.0)
+            rows.append(feat_values)
+            labels.append(int(row["label_int"]))
+    X = np.nan_to_num(np.array(rows, dtype=np.float32), nan=0.0)
+    y = np.array(labels, dtype=np.int32)
+    return X, y, feature_cols
+# ═══════════════════════════════════════════════════════════════════════
+# Main entry point
+# ═══════════════════════════════════════════════════════════════════════
+def generate_all_figures(
+    results_path: str | Path,
+    features_csv: str | Path,
+    output_dir: str | Path = "figures",
+) -> None:
+    """Generate all publication-quality figures."""
+    results_path = Path(results_path)
+    features_csv = Path(features_csv)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    print(f"\nLoading results from {results_path}...")
+    with open(results_path, "r") as f:
+        saved_results = json.load(f)
+    # We need y_true/y_pred/y_prob arrays — check if they're stored
+    # If not in the saved JSON, we can't plot ROC/PR curves
+    has_arrays = any(
+        "y_true" in v for k, v in saved_results.items()
+        if isinstance(v, dict) and not k.startswith("_")
+    )
+    print(f"\nGenerating figures in {output_dir}/...\n")
+    if has_arrays:
+        print("[1/8] ROC Curves")
+        plot_roc_curves(saved_results, output_dir)
+        print("[2/8] Precision-Recall Curves")
+        plot_pr_curves(saved_results, output_dir)
+        print("[3/8] Confusion Matrices")
+        plot_confusion_matrices(saved_results, output_dir)
+    else:
+        print("[1-3/8] Skipping ROC/PR/CM — no per-sample predictions stored")
+    print("[4/8] Model Comparison Bar Chart")
+    plot_model_comparison(saved_results, output_dir)
+    print("[5/8] Feature Importance")
+    plot_feature_importance(saved_results, output_dir)
+    print("[6/8] Feature Correlation Heatmap")
+    plot_correlation_heatmap(features_csv, output_dir)
+    print("[7/8] Feature Distributions (AI vs Human)")
+    plot_feature_distributions(features_csv, output_dir, saved_results)
+    print("[8/8] LaTeX / Markdown Tables")
+    generate_latex_table(saved_results, output_dir)
+    print(f"\nAll figures saved to {output_dir}/")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="AURIS Visualization Pipeline")
+    parser.add_argument(
+        "--results", default="models/training_results.json",
+        help="Path to training_results.json",
+    )
+    parser.add_argument(
+        "--features", default="data/training/features.csv",
+        help="Path to features.csv",
+    )
+    parser.add_argument(
+        "--output", default="figures",
+        help="Output directory for figures",
+    )
+    args = parser.parse_args()
+    generate_all_figures(args.results, args.features, args.output)