Create 13_projective_rehaul_probe_battery_testing.py

Browse files

Files changed (1) hide show

13_projective_rehaul_probe_battery_testing.py +556 -0

13_projective_rehaul_probe_battery_testing.py ADDED Viewed

	@@ -0,0 +1,556 @@

+"""
+implicit_solver/A2_projective_reprobe_h2_64_singles.py
+=======================================================
+Apply the A0/A1 projective probe to all 16 single-noise h2-64 batteries
+(indices 0-15, phase='final'). Tests whether the projective-axis
+interpretation holds across:
+  - 16 different training distributions (one per noise type)
+  - Full 10-epoch convergence (not just 1000 batches)
+  - Production-grade sphere-solver batteries
+For each battery:
+  1. Collect M tensor from gaussian test inputs (512 samples)
+  2. Identify antipodal pairs (mutual-strongest, cos < -0.9)
+  3. Collapse to projective axes
+  4. Run projective probe metrics:
+     - mean pairwise angle on ℝP³
+     - deviation from uniform ℝP³ baseline
+     - cluster silhouette (structure above uniform?)
+     - effective rank (dimension utilization)
+     - secondary antipodal count (further collapse?)
+Expected if projective-reading hypothesis holds:
+  - All 16 batteries: |deviation| < 0.05
+  - All 16 batteries: effective rank 3.9+ of 4
+  - Axis count varies per battery (noise-type-dependent codebook size)
+  - Cluster silhouette low across all (no residual structure)
+Output
+------
+/content/implicit_solver_reports/A2_projective_h2_64_singles.json
+/content/implicit_solver_reports/A2_projective_h2_64_singles.png
+"""
+import json
+import math
+from pathlib import Path
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+from sklearn.metrics import silhouette_score
+OUTPUT_DIR = Path("/content/implicit_solver_reports")
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+OUTPUT_PLOT = OUTPUT_DIR / "A2_projective_h2_64_singles.png"
+OUTPUT_JSON = OUTPUT_DIR / "A2_projective_h2_64_singles.json"
+NOISE_TYPE_NAMES = [
+    'gaussian',    'uniform',     'uniform_scaled', 'block',
+    'gradient',    'checker',     'salt_pepper',    'cauchy',
+    'laplace',     'periodic',    'exponential',    'mixed',
+    'poisson',     'structural',  'rayleigh',       'lognormal',
+]
+# ════════════════════════════════════════════════════════════════════
+# Loading
+# ════════════════════════════════════════════════════════════════════
+def load_h2_64_array():
+    """Use `loaded` from globals if available, else fetch from HF."""
+    array_model = globals().get('loaded')
+    if array_model is None:
+        import geolip_svae.arrays  # noqa — registers BatteryArrayConfig
+        from transformers import AutoModel
+        print("  `loaded` not in globals, fetching h2-64 from HF...")
+        array_model = AutoModel.from_pretrained(
+            "AbstractPhil/geolip-svae-h2-64")
+    else:
+        print("  Using `loaded` from global session")
+    return array_model
+def collect_M_from_bank(bank, img_size=64, n_batches=8, batch_size=64):
+    """Collect per-sample M from one battery bank on gaussian test input."""
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    bank = bank.to(device)
+    ds = OmegaNoiseDataset(
+        size=n_batches * batch_size, img_size=img_size, allowed_types=[0])
+    loader = torch.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=False)
+    all_M = []
+    with torch.no_grad():
+        for imgs, _ in loader:
+            imgs = imgs.to(device)
+            out = bank(imgs)
+            M_patch0 = out['svd']['M'][:, 0]  # [B, V, D]
+            all_M.append(M_patch0.cpu())
+    return torch.cat(all_M, dim=0).numpy()
+# ════════════════════════════════════════════════════════════════════
+# Projective probe (carry from A0/A1)
+# ════════════════════════════════════════════════════════════════════
+def identify_antipodal_pairs(M_avg, threshold=-0.9):
+    norms = np.linalg.norm(M_avg, axis=1, keepdims=True)
+    unit = M_avg / np.clip(norms, 1e-12, None)
+    cosines = unit @ unit.T
+    np.fill_diagonal(cosines, 1.0)
+    V = M_avg.shape[0]
+    claimed = [False] * V
+    pairs = []
+    candidates = []
+    for i in range(V):
+        best_j = int(cosines[i].argmin())
+        best_cos = float(cosines[i, best_j])
+        if best_cos < threshold:
+            candidates.append((best_cos, i, best_j))
+    candidates.sort()
+    for cos_val, i, j in candidates:
+        if claimed[i] or claimed[j]:
+            continue
+        if cosines[j].argmin() == i or cosines[j, i] < threshold:
+            pairs.append((min(i, j), max(i, j)))
+            claimed[i] = True
+            claimed[j] = True
+    unpaired = [i for i in range(V) if not claimed[i]]
+    return pairs, unpaired
+def collapse_to_axes(M_avg, pairs, unpaired):
+    norms = np.linalg.norm(M_avg, axis=1, keepdims=True)
+    unit = M_avg / np.clip(norms, 1e-12, None)
+    reps = []
+    for i, j in pairs:
+        merged = unit[i] - unit[j]
+        merged = merged / max(np.linalg.norm(merged), 1e-12)
+        for k in range(merged.shape[0]):
+            if abs(merged[k]) > 1e-6:
+                if merged[k] < 0:
+                    merged = -merged
+                break
+        reps.append(merged)
+    for i in unpaired:
+        r = unit[i].copy()
+        for k in range(r.shape[0]):
+            if abs(r[k]) > 1e-6:
+                if r[k] < 0:
+                    r = -r
+                break
+        reps.append(r)
+    return np.array(reps)
+def projective_pairwise_angles(axes):
+    n = axes.shape[0]
+    cosines = np.clip(axes @ axes.T, -1, 1)
+    raw = np.arccos(cosines)
+    proj = np.minimum(raw, np.pi - raw)
+    return proj[np.triu_indices(n, k=1)]
+def uniform_rp_baseline(D, n_axes, n_trials=10):
+    rng = np.random.RandomState(0)
+    means = []
+    for _ in range(n_trials):
+        x = rng.randn(n_axes, D)
+        x = x / np.linalg.norm(x, axis=1, keepdims=True)
+        for k in range(D):
+            mask = (x[:, k] != 0) & (
+                np.all(x[:, :k] == 0, axis=1) if k > 0
+                else np.ones(n_axes, dtype=bool))
+            x[mask] = x[mask] * np.sign(x[mask, k:k+1])
+            if not np.any(mask):
+                break
+        means.append(projective_pairwise_angles(x).mean())
+    return float(np.mean(means))
+def probe_battery(M_avg):
+    pairs, unpaired = identify_antipodal_pairs(M_avg, threshold=-0.9)
+    axes = collapse_to_axes(M_avg, pairs, unpaired)
+    D = axes.shape[1]
+    n = axes.shape[0]
+    proj_angles = projective_pairwise_angles(axes)
+    baseline = uniform_rp_baseline(D, n)
+    deviation = float(proj_angles.mean() - baseline)
+    # Cluster silhouette
+    sils = []
+    for k in range(2, min(8, n)):
+        try:
+            km = KMeans(n_clusters=k, n_init=5, random_state=42)
+            labels = km.fit_predict(axes)
+            if len(set(labels)) >= 2:
+                sils.append((k, silhouette_score(axes, labels)))
+        except Exception:
+            pass
+    best_k, best_sil = (max(sils, key=lambda x: x[1])
+                        if sils else (None, None))
+    # Effective rank
+    sv = np.linalg.svd(axes, compute_uv=False)
+    sv_norm = sv / sv.sum()
+    erank = math.exp(-(sv_norm * np.log(sv_norm + 1e-12)).sum())
+    # Secondary antipodal
+    cos_axes = axes @ axes.T
+    np.fill_diagonal(cos_axes, 1.0)
+    secondary = (cos_axes.min(axis=1) < -0.9).sum() // 2
+    return {
+        'pairs': len(pairs),
+        'unpaired': len(unpaired),
+        'n_axes': n,
+        'proj_angle_mean': float(proj_angles.mean()),
+        'uniform_baseline': baseline,
+        'deviation': deviation,
+        'best_cluster_k': best_k,
+        'best_silhouette': float(best_sil) if best_sil else None,
+        'effective_rank': float(erank),
+        'utilization': float(erank / D),
+        'secondary_antipodal': int(secondary),
+        'D': int(D),
+        'proj_angles_subset': proj_angles[:100].tolist(),
+    }
+def classify_projective_fit(probe):
+    """Is this battery well-described by projective-axis reading?"""
+    uniform = abs(probe['deviation']) < 0.05
+    full_rank = probe['utilization'] > 0.95
+    no_clusters = (probe['best_silhouette'] or 0) < 0.4
+    low_secondary = probe['secondary_antipodal'] <= 3
+    if uniform and full_rank and no_clusters and low_secondary:
+        return 'PROJECTIVE-CLEAN'
+    elif uniform and full_rank:
+        return 'PROJECTIVE-MOSTLY'
+    elif full_rank:
+        return 'STRUCTURED'
+    else:
+        return 'DEGENERATE'
+# ════════════════════════════════════════════════════════════════════
+# Main
+# ════════════════════════════════════════════════════════════════════
+def main():
+    print("=" * 70)
+    print("A2 — projective re-probe of h2-64 single-noise batteries 0-15")
+    print("Tests whether projective-axis reading holds across training")
+    print("distributions (16 noise types, all 10-epoch converged)")
+    print("=" * 70)
+    print("\nLoading h2-64 array...")
+    array_model = load_h2_64_array()
+    print("\nProbing each single-noise battery on gaussian inputs:\n")
+    print(f"  {'Idx':>3}  {'Noise type':<18} {'Pairs':>5} {'Axes':>5} "
+          f"{'Dev':>8} {'Sil':>6} {'Erank':>5} {'2°':>3}  Verdict")
+    print("  " + "-" * 85)
+    results = []
+    for batt_idx in range(16):
+        cfg_dict = array_model.config.batteries[batt_idx]
+        noise_name = NOISE_TYPE_NAMES[batt_idx]
+        assert cfg_dict.get('noise_types') == [batt_idx], \
+            f"Expected battery {batt_idx} to be single noise {batt_idx}, " \
+            f"got {cfg_dict.get('noise_types')}"
+        bank = array_model.bank(batt_idx, 'final')
+        bank.eval()
+        try:
+            all_M = collect_M_from_bank(bank)
+            M_avg = all_M.mean(axis=0)
+            probe = probe_battery(M_avg)
+            probe['battery_idx'] = batt_idx
+            probe['noise_name'] = noise_name
+            probe['verdict'] = classify_projective_fit(probe)
+            print(f"  {batt_idx:>3}  {noise_name:<18} "
+                  f"{probe['pairs']:>5} {probe['n_axes']:>5} "
+                  f"{probe['deviation']:>+.3f} "
+                  f"{probe['best_silhouette'] or 0:>6.3f} "
+                  f"{probe['effective_rank']:>5.2f} "
+                  f"{probe['secondary_antipodal']:>3}  {probe['verdict']}")
+        except Exception as e:
+            print(f"  {batt_idx:>3}  {noise_name:<18} ERROR: "
+                  f"{type(e).__name__}: {str(e)[:40]}")
+            probe = {'battery_idx': batt_idx, 'noise_name': noise_name,
+                     'error': str(e)}
+        results.append(probe)
+    # ════════════════════════════════════════════════════════════════
+    # Aggregate summary
+    # ════════════════════════════════════════════════════════════════
+    ok_results = [r for r in results if 'error' not in r]
+    print("\n" + "=" * 70)
+    print("AGGREGATE RESULTS")
+    print("=" * 70)
+    verdicts = {}
+    for r in ok_results:
+        verdicts[r['verdict']] = verdicts.get(r['verdict'], 0) + 1
+    print("\nVerdict distribution:")
+    for v, n in sorted(verdicts.items(), key=lambda x: -x[1]):
+        print(f"  {v}: {n}/{len(ok_results)}")
+    # Axis count statistics
+    axis_counts = [r['n_axes'] for r in ok_results]
+    pairs_counts = [r['pairs'] for r in ok_results]
+    deviations = [r['deviation'] for r in ok_results]
+    silhouettes = [r['best_silhouette'] or 0 for r in ok_results]
+    eranks = [r['effective_rank'] for r in ok_results]
+    print(f"\nAxis count across 16 batteries:")
+    print(f"  min: {min(axis_counts)}, max: {max(axis_counts)}, "
+          f"mean: {np.mean(axis_counts):.1f}, std: {np.std(axis_counts):.1f}")
+    print(f"\nAntipodal pairs across 16 batteries:")
+    print(f"  min: {min(pairs_counts)}, max: {max(pairs_counts)}, "
+          f"mean: {np.mean(pairs_counts):.1f}, std: {np.std(pairs_counts):.1f}")
+    print(f"\nDeviation from uniform ℝP³:")
+    print(f"  min: {min(deviations):+.4f}, max: {max(deviations):+.4f}, "
+          f"mean: {np.mean(deviations):+.4f}, std: {np.std(deviations):.4f}")
+    print(f"\nCluster silhouette:")
+    print(f"  min: {min(silhouettes):.3f}, max: {max(silhouettes):.3f}, "
+          f"mean: {np.mean(silhouettes):.3f}")
+    print(f"\nEffective rank (max 4.0):")
+    print(f"  min: {min(eranks):.3f}, max: {max(eranks):.3f}, "
+          f"mean: {np.mean(eranks):.3f}")
+    # ════════════════════════════════════════════════════════════════
+    # Save JSON
+    # ════════════════════════════════════════════════════════════════
+    with open(OUTPUT_JSON, 'w') as f:
+        json.dump({
+            'results_per_battery': results,
+            'aggregate': {
+                'n_batteries': len(ok_results),
+                'verdict_counts': verdicts,
+                'axis_count_stats': {
+                    'min': int(min(axis_counts)),
+                    'max': int(max(axis_counts)),
+                    'mean': float(np.mean(axis_counts)),
+                    'std': float(np.std(axis_counts)),
+                },
+                'deviation_stats': {
+                    'min': float(min(deviations)),
+                    'max': float(max(deviations)),
+                    'mean': float(np.mean(deviations)),
+                    'std': float(np.std(deviations)),
+                },
+                'silhouette_stats': {
+                    'min': float(min(silhouettes)),
+                    'max': float(max(silhouettes)),
+                    'mean': float(np.mean(silhouettes)),
+                },
+                'erank_stats': {
+                    'min': float(min(eranks)),
+                    'max': float(max(eranks)),
+                    'mean': float(np.mean(eranks)),
+                },
+            },
+        }, f, indent=2, default=str)
+    print(f"\nSaved: {OUTPUT_JSON}")
+    # ══════════════════════════════════════════════════���═════════════
+    # Plot: 6 panels summarizing the cross-battery picture
+    # ════════════════════════════════════════════════════════════════
+    fig = plt.figure(figsize=(18, 12))
+    # Panel 1: per-battery deviation from uniform
+    ax1 = fig.add_subplot(2, 3, 1)
+    x = np.arange(len(ok_results))
+    devs = [r['deviation'] for r in ok_results]
+    colors = ['green' if abs(d) < 0.05 else 'orange' if abs(d) < 0.1 else 'red'
+              for d in devs]
+    ax1.bar(x, devs, color=colors)
+    ax1.axhline(0.05, color='red', linestyle='--', alpha=0.5,
+                 label='±0.05 threshold')
+    ax1.axhline(-0.05, color='red', linestyle='--', alpha=0.5)
+    ax1.axhline(0, color='black', linestyle='-', alpha=0.3)
+    ax1.set_xticks(x)
+    ax1.set_xticklabels([r['noise_name'][:6] for r in ok_results],
+                         rotation=60, ha='right', fontsize=8)
+    ax1.set_ylabel('Deviation from uniform ℝP³')
+    ax1.set_title('Projective uniformity per battery')
+    ax1.legend(fontsize=8)
+    ax1.grid(alpha=0.3, axis='y')
+    # Panel 2: axis count per battery
+    ax2 = fig.add_subplot(2, 3, 2)
+    axes_n = [r['n_axes'] for r in ok_results]
+    pairs_n = [r['pairs'] for r in ok_results]
+    ax2.bar(x, axes_n, color='steelblue', label='Total axes')
+    ax2.bar(x, pairs_n, color='darkorange', label='Antipodal pairs collapsed')
+    ax2.set_xticks(x)
+    ax2.set_xticklabels([r['noise_name'][:6] for r in ok_results],
+                         rotation=60, ha='right', fontsize=8)
+    ax2.set_ylabel('Count')
+    ax2.set_title('Axis codebook size per battery\n'
+                   '(V=32 rows → N axes after collapse)')
+    ax2.legend(fontsize=8)
+    ax2.grid(alpha=0.3, axis='y')
+    # Panel 3: cluster silhouette
+    ax3 = fig.add_subplot(2, 3, 3)
+    sils = [r['best_silhouette'] or 0 for r in ok_results]
+    colors = ['green' if s < 0.4 else 'orange' if s < 0.5 else 'red' for s in sils]
+    ax3.bar(x, sils, color=colors)
+    ax3.axhline(0.4, color='orange', linestyle='--', alpha=0.5,
+                 label='weak structure')
+    ax3.axhline(0.5, color='red', linestyle='--', alpha=0.5,
+                 label='strong structure')
+    ax3.set_xticks(x)
+    ax3.set_xticklabels([r['noise_name'][:6] for r in ok_results],
+                         rotation=60, ha='right', fontsize=8)
+    ax3.set_ylabel('Best cluster silhouette')
+    ax3.set_title('Residual structure on ℝP³\n(low = clean projective)')
+    ax3.legend(fontsize=8)
+    ax3.grid(alpha=0.3, axis='y')
+    # Panel 4: effective rank
+    ax4 = fig.add_subplot(2, 3, 4)
+    eranks_arr = [r['effective_rank'] for r in ok_results]
+    ax4.bar(x, eranks_arr, color='purple')
+    ax4.axhline(4.0, color='green', linestyle='--', alpha=0.5,
+                 label='max (full rank 4)')
+    ax4.axhline(3.8, color='orange', linestyle='--', alpha=0.5,
+                 label='0.95 × max')
+    ax4.set_xticks(x)
+    ax4.set_xticklabels([r['noise_name'][:6] for r in ok_results],
+                         rotation=60, ha='right', fontsize=8)
+    ax4.set_ylabel('Effective rank')
+    ax4.set_title('Dimension utilization on ℝP³')
+    ax4.set_ylim([3.0, 4.05])
+    ax4.legend(fontsize=8)
+    ax4.grid(alpha=0.3, axis='y')
+    # Panel 5: aggregate angle distribution
+    ax5 = fig.add_subplot(2, 3, 5)
+    all_angles = []
+    for r in ok_results:
+        all_angles.extend(r['proj_angles_subset'])
+    ax5.hist(all_angles, bins=40, density=True, alpha=0.7, color='steelblue')
+    # Empirical uniform baseline for ℝP³
+    avg_baseline = np.mean([r['uniform_baseline'] for r in ok_results])
+    ax5.axvline(avg_baseline, color='red', linestyle='--',
+                 label=f'uniform ℝP³ baseline ({avg_baseline:.3f})')
+    ax5.set_xlabel('Projective pairwise angle (radians)')
+    ax5.set_ylabel('Density')
+    ax5.set_title(f'Aggregate angle distribution\n'
+                   f'(all 16 batteries pooled)')
+    ax5.legend(fontsize=8)
+    # Panel 6: verdict summary text
+    ax6 = fig.add_subplot(2, 3, 6)
+    ax6.axis('off')
+    n_clean = verdicts.get('PROJECTIVE-CLEAN', 0)
+    n_mostly = verdicts.get('PROJECTIVE-MOSTLY', 0)
+    n_struct = verdicts.get('STRUCTURED', 0)
+    n_degen = verdicts.get('DEGENERATE', 0)
+    total = len(ok_results)
+    clean_frac = (n_clean + n_mostly) / max(total, 1)
+    if clean_frac >= 0.9:
+        headline = "✓ HYPOTHESIS SUPPORTED"
+        color = 'lightgreen'
+    elif clean_frac >= 0.7:
+        headline = "~ MOSTLY SUPPORTED"
+        color = 'palegreen'
+    elif clean_frac >= 0.5:
+        headline = "~ MIXED"
+        color = 'lightyellow'
+    else:
+        headline = "✗ HYPOTHESIS NOT SUPPORTED"
+        color = 'mistyrose'
+    summary_text = (
+        f"16 single-noise batteries probed.\n"
+        f"All h2-64 architecture (V=32, D=4, H2_linear_matched).\n"
+        f"All 10-epoch fully converged.\n\n"
+        f"PROJECTIVE-CLEAN:   {n_clean}/{total}\n"
+        f"PROJECTIVE-MOSTLY:  {n_mostly}/{total}\n"
+        f"STRUCTURED:         {n_struct}/{total}\n"
+        f"DEGENERATE:         {n_degen}/{total}\n\n"
+        f"Axis count range:    {min(axis_counts)}-{max(axis_counts)}\n"
+        f"Mean deviation:      {np.mean(deviations):+.4f}\n"
+        f"Mean silhouette:     {np.mean(silhouettes):.3f}\n"
+        f"Mean effective rank: {np.mean(eranks):.2f} / 4\n\n"
+        f"Interpretation:\n"
+    )
+    if clean_frac >= 0.9:
+        summary_text += (
+            "Projective-axis reading is GENERAL — holds across\n"
+            "16 different training distributions at full convergence.\n"
+            "h2-64 is a library of per-noise-type axis codebooks."
+        )
+    elif clean_frac >= 0.7:
+        summary_text += (
+            "Most batteries fit the projective reading.\n"
+            "Outliers suggest noise-type-specific geometry variations.\n"
+            "Worth investigating which noise types deviate and why."
+        )
+    else:
+        summary_text += (
+            "Projective reading doesn't generalize cleanly.\n"
+            "Either the threshold (|dev|<0.05) is too strict,\n"
+            "or h2-64 batteries have noise-specific non-projective\n"
+            "geometry that only the D=3 and Q-rank02 cases shared."
+        )
+    ax6.text(0.5, 0.95, headline, ha='center', va='top',
+              fontsize=16, fontweight='bold',
+              bbox=dict(boxstyle='round', facecolor=color, alpha=0.8))
+    ax6.text(0.05, 0.78, summary_text, ha='left', va='top',
+              fontsize=9, family='monospace')
+    plt.tight_layout()
+    plt.savefig(OUTPUT_PLOT, dpi=120, bbox_inches='tight')
+    plt.show()
+    print(f"Saved: {OUTPUT_PLOT}")
+    # Conclusion
+    print("\n" + "=" * 70)
+    print("CONCLUSION")
+    print("=" * 70)
+    print(f"\n  {n_clean + n_mostly}/{total} batteries fit the projective "
+          f"reading (clean or mostly).")
+    print(f"  Mean deviation from uniform ℝP³: {np.mean(deviations):+.4f}")
+    print(f"  Mean cluster silhouette: {np.mean(silhouettes):.3f}")
+    print()
+    if clean_frac >= 0.9:
+        print("  The projective-axis hypothesis is SUPPORTED across 16 different")
+        print("  trained sphere-solvers. This is strong evidence that the")
+        print("  ℝP^(D-1) reading is general, not D=3-specific or Q-sweep-specific.")
+        print()
+        print("  h2-64 is effectively a library of 16 trained axis codebooks,")
+        print("  each with its own cardinality and orientation on ℝP³,")
+        print("  each trained for a specific noise discrimination task.")
+    return results
+if __name__ == '__main__':
+    results = main()