File size: 6,470 Bytes
44311e2 f9aaa57 44311e2 f9aaa57 44311e2 f9aaa57 44311e2 f9aaa57 44311e2 f9aaa57 44311e2 f9aaa57 44311e2 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 44311e2 f9aaa57 75cf8a6 f9aaa57 44311e2 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 44311e2 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 44311e2 f9aaa57 44311e2 f9aaa57 75cf8a6 f9aaa57 44311e2 75cf8a6 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 75cf8a6 f9aaa57 44311e2 f9aaa57 44311e2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | #!/usr/bin/env python3
"""Compute theoretical maximum top-1 accuracy for random chess play.
Two ceilings computed via Monte Carlo rollouts in the Rust engine:
1. Unconditional: E[1/N_legal] — best accuracy without knowing the outcome.
2. Outcome-conditioned: E[max_m P(m|outcome, history)] — best accuracy when
the outcome token is known. Estimated by playing out random continuations
from each legal move and measuring which outcomes result.
The "adjusted accuracy" normalizes model accuracy against these ceilings:
adjusted = model_accuracy / ceiling
Usage:
uv run python scripts/compute_theoretical_ceiling.py
uv run python scripts/compute_theoretical_ceiling.py --n-games 5000 --rollouts 64
uv run python scripts/compute_theoretical_ceiling.py --model-accuracy 0.070
"""
from __future__ import annotations
import argparse
import json
import time
from pathlib import Path
import numpy as np
import chess_engine as engine
def main():
parser = argparse.ArgumentParser(
description="Compute theoretical accuracy ceilings for random chess"
)
parser.add_argument("--n-games", type=int, default=2000,
help="Number of random games to generate")
parser.add_argument("--rollouts", type=int, default=32,
help="Monte Carlo rollouts per legal move")
parser.add_argument("--sample-rate", type=float, default=0.02,
help="Fraction of positions to sample (1.0=all, 0.02=2%%)")
parser.add_argument("--seed", type=int, default=77777)
parser.add_argument("--output", type=str, default="data/theoretical_ceiling.json")
parser.add_argument("--model-accuracy", type=float, default=None,
help="Model top-1 accuracy to compute adjusted score")
args = parser.parse_args()
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
print(f"Computing theoretical accuracy ceilings")
print(f" Games: {args.n_games:,}")
print(f" Rollouts/move: {args.rollouts}")
print(f" Sample rate: {args.sample_rate:.0%}")
print(f" Seed: {args.seed}")
print()
t0 = time.time()
result = engine.compute_accuracy_ceiling(
n_games=args.n_games,
max_ply=255,
n_rollouts=args.rollouts,
sample_rate=args.sample_rate,
seed=args.seed,
)
elapsed = time.time() - t0
uncond = result["unconditional_ceiling"]
naive_cond = result["naive_conditional_ceiling"]
cond = result["conditional_ceiling"]
boost_naive = naive_cond / uncond if uncond > 0 else 0
boost = cond / uncond if uncond > 0 else 0
print(f"Positions sampled: {result['n_positions']:,}")
print(f"Unconditional ceiling: {uncond:.4f} ({uncond*100:.2f}%)")
print(f"Naive conditional ceiling: {naive_cond:.4f} ({naive_cond*100:.2f}%) {boost_naive:.2f}x")
print(f"MCTS conditional ceiling: {cond:.4f} ({cond*100:.2f}%) {boost:.2f}x")
print(f"Time: {elapsed:.0f}s")
print()
# Per-outcome breakdown
outcomes = np.asarray(result["outcome"])
conditionals = np.asarray(result["conditional"])
naive_conditionals = np.asarray(result["naive_conditional"])
unconditionals = np.asarray(result["unconditional"])
outcome_names = [
"W checkmated", "B checkmated", "Stalemate", "75-move",
"5-fold rep", "Insuff mat", "Ply limit",
]
print("Per-outcome breakdown:")
outcome_data = {}
for oi in range(7):
mask = outcomes == oi
n = int(mask.sum())
if n > 0:
uc = float(unconditionals[mask].mean())
nc = float(naive_conditionals[mask].mean())
cc = float(conditionals[mask].mean())
print(f" {outcome_names[oi]:>12}: uncond={uc:.4f} naive={nc:.4f} "
f"mcts={cc:.4f} (n={n})")
outcome_data[outcome_names[oi]] = {
"unconditional": uc, "naive_conditional": nc,
"conditional": cc, "n_positions": n,
}
print()
# Per-ply-from-end breakdown
plies = np.asarray(result["ply"])
game_lengths = np.asarray(result["game_length"])
plies_from_end = game_lengths - plies
print("Ceiling by distance from game end:")
distance_data = {}
for dist in range(1, 21):
mask = plies_from_end == dist
n = int(mask.sum())
if n > 10:
uc = float(unconditionals[mask].mean())
nc = float(naive_conditionals[mask].mean())
cc = float(conditionals[mask].mean())
bar = "#" * int(cc * 200)
print(f" {dist:>3} plies from end: uncond={uc:.4f} naive={nc:.4f} mcts={cc:.4f} {bar}")
distance_data[dist] = {"unconditional": uc, "naive_conditional": nc, "conditional": cc, "n": n}
print()
# Model adjusted accuracy
if args.model_accuracy is not None:
ma = args.model_accuracy
adj_uncond = ma / uncond if uncond > 0 else 0
adj_naive = ma / naive_cond if naive_cond > 0 else 0
adj_cond = ma / cond if cond > 0 else 0
print(f"Model accuracy: {ma:.4f} ({ma*100:.2f}%)")
print(f" vs unconditional ceiling: {adj_uncond:.1%} of theoretical max")
print(f" vs naive conditional ceiling: {adj_naive:.1%} of theoretical max")
print(f" vs MCTS conditional ceiling: {adj_cond:.1%} of theoretical max")
print()
# Save results
data = {
"unconditional_ceiling": float(uncond),
"naive_conditional_ceiling": float(naive_cond),
"conditional_ceiling": float(cond),
"naive_conditioning_boost": float(boost_naive),
"mcts_conditioning_boost": float(boost),
"n_positions": int(result["n_positions"]),
"n_games": args.n_games,
"n_rollouts": args.rollouts,
"sample_rate": args.sample_rate,
"seed": args.seed,
"elapsed_seconds": elapsed,
"per_outcome": outcome_data,
"per_distance_from_end": {str(k): v for k, v in distance_data.items()},
}
if args.model_accuracy is not None:
data["model_accuracy"] = args.model_accuracy
data["adjusted_vs_unconditional"] = adj_uncond
data["adjusted_vs_naive_conditional"] = adj_naive
data["adjusted_vs_conditional"] = adj_cond
with open(output_path, "w") as f:
json.dump(data, f, indent=2)
print(f"Saved to {output_path}")
if __name__ == "__main__":
main()
|