File size: 6,470 Bytes
44311e2
 
 
f9aaa57
 
44311e2
 
f9aaa57
 
44311e2
 
 
 
 
f9aaa57
 
 
44311e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9aaa57
 
 
 
 
 
44311e2
 
 
 
 
 
 
 
 
f9aaa57
 
 
 
 
44311e2
 
 
f9aaa57
 
 
 
 
 
 
 
44311e2
f9aaa57
75cf8a6
f9aaa57
75cf8a6
f9aaa57
44311e2
f9aaa57
75cf8a6
 
 
f9aaa57
44311e2
 
f9aaa57
75cf8a6
 
 
 
f9aaa57
75cf8a6
 
f9aaa57
 
 
 
75cf8a6
f9aaa57
 
 
 
75cf8a6
f9aaa57
75cf8a6
 
f9aaa57
75cf8a6
 
f9aaa57
44311e2
 
f9aaa57
75cf8a6
 
f9aaa57
 
 
 
 
 
 
 
 
75cf8a6
f9aaa57
 
75cf8a6
 
f9aaa57
44311e2
f9aaa57
44311e2
 
f9aaa57
75cf8a6
f9aaa57
44311e2
75cf8a6
 
 
f9aaa57
 
 
 
 
75cf8a6
f9aaa57
75cf8a6
 
f9aaa57
 
 
 
 
 
 
 
 
 
 
 
75cf8a6
f9aaa57
44311e2
 
f9aaa57
 
44311e2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python3
"""Compute theoretical maximum top-1 accuracy for random chess play.

Two ceilings computed via Monte Carlo rollouts in the Rust engine:

1. Unconditional: E[1/N_legal] — best accuracy without knowing the outcome.
2. Outcome-conditioned: E[max_m P(m|outcome, history)] — best accuracy when
   the outcome token is known. Estimated by playing out random continuations
   from each legal move and measuring which outcomes result.

The "adjusted accuracy" normalizes model accuracy against these ceilings:
    adjusted = model_accuracy / ceiling

Usage:
    uv run python scripts/compute_theoretical_ceiling.py
    uv run python scripts/compute_theoretical_ceiling.py --n-games 5000 --rollouts 64
    uv run python scripts/compute_theoretical_ceiling.py --model-accuracy 0.070
"""

from __future__ import annotations

import argparse
import json
import time
from pathlib import Path

import numpy as np

import chess_engine as engine


def main():
    parser = argparse.ArgumentParser(
        description="Compute theoretical accuracy ceilings for random chess"
    )
    parser.add_argument("--n-games", type=int, default=2000,
                        help="Number of random games to generate")
    parser.add_argument("--rollouts", type=int, default=32,
                        help="Monte Carlo rollouts per legal move")
    parser.add_argument("--sample-rate", type=float, default=0.02,
                        help="Fraction of positions to sample (1.0=all, 0.02=2%%)")
    parser.add_argument("--seed", type=int, default=77777)
    parser.add_argument("--output", type=str, default="data/theoretical_ceiling.json")
    parser.add_argument("--model-accuracy", type=float, default=None,
                        help="Model top-1 accuracy to compute adjusted score")
    args = parser.parse_args()

    output_path = Path(args.output)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    print(f"Computing theoretical accuracy ceilings")
    print(f"  Games: {args.n_games:,}")
    print(f"  Rollouts/move: {args.rollouts}")
    print(f"  Sample rate: {args.sample_rate:.0%}")
    print(f"  Seed: {args.seed}")
    print()

    t0 = time.time()
    result = engine.compute_accuracy_ceiling(
        n_games=args.n_games,
        max_ply=255,
        n_rollouts=args.rollouts,
        sample_rate=args.sample_rate,
        seed=args.seed,
    )
    elapsed = time.time() - t0

    uncond = result["unconditional_ceiling"]
    naive_cond = result["naive_conditional_ceiling"]
    cond = result["conditional_ceiling"]
    boost_naive = naive_cond / uncond if uncond > 0 else 0
    boost = cond / uncond if uncond > 0 else 0

    print(f"Positions sampled: {result['n_positions']:,}")
    print(f"Unconditional ceiling:       {uncond:.4f} ({uncond*100:.2f}%)")
    print(f"Naive conditional ceiling:   {naive_cond:.4f} ({naive_cond*100:.2f}%)  {boost_naive:.2f}x")
    print(f"MCTS conditional ceiling:    {cond:.4f} ({cond*100:.2f}%)  {boost:.2f}x")
    print(f"Time: {elapsed:.0f}s")
    print()

    # Per-outcome breakdown
    outcomes = np.asarray(result["outcome"])
    conditionals = np.asarray(result["conditional"])
    naive_conditionals = np.asarray(result["naive_conditional"])
    unconditionals = np.asarray(result["unconditional"])
    outcome_names = [
        "W checkmated", "B checkmated", "Stalemate", "75-move",
        "5-fold rep", "Insuff mat", "Ply limit",
    ]

    print("Per-outcome breakdown:")
    outcome_data = {}
    for oi in range(7):
        mask = outcomes == oi
        n = int(mask.sum())
        if n > 0:
            uc = float(unconditionals[mask].mean())
            nc = float(naive_conditionals[mask].mean())
            cc = float(conditionals[mask].mean())
            print(f"  {outcome_names[oi]:>12}: uncond={uc:.4f}  naive={nc:.4f}  "
                  f"mcts={cc:.4f}  (n={n})")
            outcome_data[outcome_names[oi]] = {
                "unconditional": uc, "naive_conditional": nc,
                "conditional": cc, "n_positions": n,
            }
    print()

    # Per-ply-from-end breakdown
    plies = np.asarray(result["ply"])
    game_lengths = np.asarray(result["game_length"])
    plies_from_end = game_lengths - plies

    print("Ceiling by distance from game end:")
    distance_data = {}
    for dist in range(1, 21):
        mask = plies_from_end == dist
        n = int(mask.sum())
        if n > 10:
            uc = float(unconditionals[mask].mean())
            nc = float(naive_conditionals[mask].mean())
            cc = float(conditionals[mask].mean())
            bar = "#" * int(cc * 200)
            print(f"  {dist:>3} plies from end: uncond={uc:.4f}  naive={nc:.4f}  mcts={cc:.4f}  {bar}")
            distance_data[dist] = {"unconditional": uc, "naive_conditional": nc, "conditional": cc, "n": n}
    print()

    # Model adjusted accuracy
    if args.model_accuracy is not None:
        ma = args.model_accuracy
        adj_uncond = ma / uncond if uncond > 0 else 0
        adj_naive = ma / naive_cond if naive_cond > 0 else 0
        adj_cond = ma / cond if cond > 0 else 0
        print(f"Model accuracy: {ma:.4f} ({ma*100:.2f}%)")
        print(f"  vs unconditional ceiling:     {adj_uncond:.1%} of theoretical max")
        print(f"  vs naive conditional ceiling: {adj_naive:.1%} of theoretical max")
        print(f"  vs MCTS conditional ceiling:  {adj_cond:.1%} of theoretical max")
        print()

    # Save results
    data = {
        "unconditional_ceiling": float(uncond),
        "naive_conditional_ceiling": float(naive_cond),
        "conditional_ceiling": float(cond),
        "naive_conditioning_boost": float(boost_naive),
        "mcts_conditioning_boost": float(boost),
        "n_positions": int(result["n_positions"]),
        "n_games": args.n_games,
        "n_rollouts": args.rollouts,
        "sample_rate": args.sample_rate,
        "seed": args.seed,
        "elapsed_seconds": elapsed,
        "per_outcome": outcome_data,
        "per_distance_from_end": {str(k): v for k, v in distance_data.items()},
    }
    if args.model_accuracy is not None:
        data["model_accuracy"] = args.model_accuracy
        data["adjusted_vs_unconditional"] = adj_uncond
        data["adjusted_vs_naive_conditional"] = adj_naive
        data["adjusted_vs_conditional"] = adj_cond

    with open(output_path, "w") as f:
        json.dump(data, f, indent=2)
    print(f"Saved to {output_path}")


if __name__ == "__main__":
    main()