#!/usr/bin/env python3 """Generate Stockfish self-play data as UCI move sequences. Runs a pool of single-threaded Stockfish engines in parallel, one per core. Output format: one game per line, space-separated UCI moves followed by the result. e2e4 e7e5 g1f3 b8c6 ... 1-0 Each tier uses MultiPV + softmax temperature sampling to produce diverse games from deterministic Stockfish search. Seeds are hardcoded per tier so runs are reproducible. Worker seeds are derived as tier_seed + worker_id. Tiers (by node count): nodes_0001: 128K games, 1 node (near-random) nodes_0032: 128K games, 32 nodes nodes_0128: 128K games, 128 nodes nodes_0256: 128K games, 256 nodes nodes_1024: 128K games, 1024 nodes (strongest) Usage: python scripts/generate_stockfish_data.py --stockfish ~/bin/stockfish --output data/stockfish/ python scripts/generate_stockfish_data.py --stockfish ~/bin/stockfish --output data/stockfish/ --tier nodes_0128 python scripts/generate_stockfish_data.py --stockfish ~/bin/stockfish --output data/stockfish/ --workers 8 """ from __future__ import annotations import argparse import math import os import random import subprocess import sys import time from concurrent.futures import ProcessPoolExecutor from multiprocessing import get_context from pathlib import Path # Hardcoded, non-conflicting seeds per tier. Worker i gets seed = tier_seed + i. TIERS = [ {"name": "nodes_0001", "nodes": 1, "games": 128_000, "seed": 10_000}, {"name": "nodes_0032", "nodes": 32, "games": 128_000, "seed": 20_000}, {"name": "nodes_0128", "nodes": 128, "games": 128_000, "seed": 30_000}, {"name": "nodes_0256", "nodes": 256, "games": 128_000, "seed": 40_000}, {"name": "nodes_1024", "nodes": 1024, "games": 128_000, "seed": 50_000}, ] MULTI_PV = 5 # candidates per move during opening TEMPERATURE = 1.0 # softmax temperature (higher = more random) SAMPLE_PLIES = 15 # use MultiPV+temperature for the first 15 plies, then top-1 class StockfishEngine: def __init__(self, path: str, hash_mb: int = 16, multi_pv: int = 1): self.last_terminal: str | None = None # "checkmate", "stalemate", or None self.proc = subprocess.Popen( [path], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, bufsize=1, ) self._send("uci") self._wait_for("uciok") self._send(f"setoption name Hash value {hash_mb}") self._send("setoption name Threads value 1") if multi_pv > 1: self._send(f"setoption name MultiPV value {multi_pv}") self._send("isready") self._wait_for("readyok") def _send(self, cmd: str): self.proc.stdin.write(cmd + "\n") self.proc.stdin.flush() def _wait_for(self, token: str) -> list[str]: lines = [] while True: line = self.proc.stdout.readline().strip() lines.append(line) if line.startswith(token): return lines def candidates(self, moves: list[str], nodes: int) -> list[tuple[str, float]]: """Return list of (uci_move, score_cp) from MultiPV search. Side effect: sets self.last_terminal to "checkmate" or "stalemate" when bestmove is (none), otherwise None. """ self.last_terminal = None pos = "position startpos" if moves: pos += " moves " + " ".join(moves) self._send(pos) self._send(f"go nodes {nodes}") lines = self._wait_for("bestmove") # Parse info lines for multipv results. Keep the last (deepest) line # for each multipv index. best_by_pv: dict[int, tuple[str, float]] = {} for line in lines: if not line.startswith("info") or " multipv " not in line: continue parts = line.split() try: pv_idx = int(parts[parts.index("multipv") + 1]) # Parse score si = parts.index("score") if parts[si + 1] == "cp": score = float(parts[si + 2]) elif parts[si + 1] == "mate": mate_in = int(parts[si + 2]) score = 30_000.0 if mate_in > 0 else -30_000.0 else: continue # First move of the PV pv_start = parts.index("pv") move = parts[pv_start + 1] best_by_pv[pv_idx] = (move, score) except (ValueError, IndexError): continue if not best_by_pv: # Fallback: parse bestmove directly for line in lines: if line.startswith("bestmove"): parts = line.split() m = parts[1] if len(parts) > 1 else None if m and m != "(none)": return [(m, 0.0)] # No legal moves — distinguish checkmate from stalemate # by checking the score in the info lines. self.last_terminal = "stalemate" # default for info_line in lines: if not info_line.startswith("info") or "score" not in info_line: continue info_parts = info_line.split() try: si = info_parts.index("score") if info_parts[si + 1] == "mate": self.last_terminal = "checkmate" except (ValueError, IndexError): pass return [] return [] # Return sorted by pv index return [best_by_pv[k] for k in sorted(best_by_pv)] def set_multi_pv(self, n: int): self._send(f"setoption name MultiPV value {n}") self._send("isready") self._wait_for("readyok") def new_game(self): self._send("ucinewgame") self._send("isready") self._wait_for("readyok") def close(self): try: self._send("quit") self.proc.wait(timeout=5) except Exception: self.proc.kill() def softmax_sample( candidates: list[tuple[str, float]], temperature: float, rng: random.Random ) -> str | None: """Pick a move from candidates using softmax over centipawn scores.""" if not candidates: return None if len(candidates) == 1: return candidates[0][0] # temperature=0 → argmax (pick the highest-scoring move) if temperature <= 0: return max(candidates, key=lambda c: c[1])[0] scores = [s for _, s in candidates] # Shift for numerical stability max_s = max(scores) # Scale: 100 cp ~ 1 pawn. temperature=1.0 means 1-pawn difference ≈ e fold. exps = [math.exp((s - max_s) / (100.0 * temperature)) for s in scores] total = sum(exps) probs = [e / total for e in exps] r = rng.random() cumulative = 0.0 for i, p in enumerate(probs): cumulative += p if r <= cumulative: return candidates[i][0] return candidates[-1][0] def play_game( engine: StockfishEngine, nodes: int, rng: random.Random, temperature: float, multi_pv: int, sample_plies: int, max_ply: int = 500, ) -> tuple[list[str], str]: """Play one self-play game with temperature sampling. Returns (moves_uci, result).""" engine.new_game() engine.set_multi_pv(multi_pv) moves: list[str] = [] switched = False for ply in range(max_ply): # Switch to top-1 after the opening phase if not switched and ply >= sample_plies: engine.set_multi_pv(1) switched = True cands = engine.candidates(moves, nodes) if switched: # Top-1 mode: just take the best move move = cands[0][0] if cands else None else: move = softmax_sample(cands, temperature, rng) if move is None: break moves.append(move) n = len(moves) if n == 0: return moves, "*" if n >= max_ply: result = "1/2-1/2" elif engine.last_terminal == "checkmate": # Side to move was checkmated result = "0-1" if n % 2 == 0 else "1-0" elif engine.last_terminal == "stalemate": result = "1/2-1/2" else: result = "*" return moves, result def worker_generate( stockfish_path: str, nodes: int, num_games: int, hash_mb: int, worker_id: int, seed: int, temperature: float, multi_pv: int, sample_plies: int, ) -> tuple[list[str], int, float]: """Worker: play num_games, return (lines, total_ply, elapsed).""" rng = random.Random(seed) engine = StockfishEngine(stockfish_path, hash_mb=hash_mb, multi_pv=multi_pv) lines: list[str] = [] total_ply = 0 t0 = time.perf_counter() for i in range(num_games): moves, result = play_game(engine, nodes, rng, temperature, multi_pv, sample_plies) total_ply += len(moves) line = " ".join(moves) + " " + result lines.append(line) if (i + 1) % 500 == 0: elapsed = time.perf_counter() - t0 rate = (i + 1) / elapsed print( f" [worker {worker_id:>2}] {i + 1:>6,}/{num_games:,} " f"{rate:.1f} games/s", flush=True, ) elapsed = time.perf_counter() - t0 engine.close() return lines, total_ply, elapsed def generate_tier( stockfish_path: str, output_dir: Path, tier: dict, num_workers: int, hash_mb: int, temperature: float, multi_pv: int, sample_plies: int, ): nodes = tier["nodes"] total_games = tier["games"] tier_seed = tier["seed"] name = tier["name"] out_path = output_dir / f"{name}.txt" # Distribute games across workers base = total_games // num_workers remainder = total_games % num_workers per_worker = [base + (1 if i < remainder else 0) for i in range(num_workers)] print(f"\n{'=' * 60}") print(f"Generating {total_games:,} games: {name} (nodes={nodes})") print(f"Workers: {num_workers}, games/worker: ~{base}") print(f"MultiPV: {multi_pv} for first {sample_plies} plies, then top-1; temperature: {temperature}") print(f"Seed base: {tier_seed} (workers {tier_seed}..{tier_seed + num_workers - 1})") print(f"Output: {out_path}") print(f"{'=' * 60}") ctx = get_context("spawn") wall_t0 = time.perf_counter() with ProcessPoolExecutor(max_workers=num_workers, mp_context=ctx) as pool: futures = [ pool.submit( worker_generate, stockfish_path, nodes, per_worker[i], hash_mb, i, tier_seed + i, temperature, multi_pv, sample_plies, ) for i in range(num_workers) ] results = [f.result() for f in futures] wall_elapsed = time.perf_counter() - wall_t0 # Collect and write total_ply = 0 total_written = 0 with open(out_path, "w") as f: for lines, ply, _ in results: for line in lines: f.write(line + "\n") total_ply += ply total_written += len(lines) avg_ply = total_ply / total_written if total_written else 0 rate = total_written / wall_elapsed print(f"\n Done: {total_written:,} games in {wall_elapsed / 60:.1f}m") print(f" Rate: {rate:.1f} games/s Avg ply: {avg_ply:.0f}") print(f" File: {out_path} ({out_path.stat().st_size / 1e6:.1f} MB)") return out_path def main(): parser = argparse.ArgumentParser( description="Generate Stockfish self-play UCI data (parallel)" ) parser.add_argument( "--stockfish", type=str, default=os.path.expanduser("~/bin/stockfish"), help="Path to stockfish binary", ) parser.add_argument( "--output", type=str, default="data/stockfish", help="Output directory" ) parser.add_argument( "--tier", type=str, default=None, help="Only generate this tier (e.g. nodes_0128)", ) parser.add_argument( "--workers", type=int, default=14, help="Number of parallel engines" ) parser.add_argument( "--games", type=int, default=None, help="Override number of games per tier" ) parser.add_argument( "--hash", type=int, default=16, help="Hash table MB per engine" ) parser.add_argument( "--temperature", type=float, default=TEMPERATURE, help="Softmax temperature" ) parser.add_argument( "--multi-pv", type=int, default=MULTI_PV, help="MultiPV candidates per move" ) parser.add_argument( "--sample-plies", type=int, default=SAMPLE_PLIES, help="Use MultiPV+temperature for the first N plies, then top-1" ) args = parser.parse_args() sf_path = os.path.expanduser(args.stockfish) if not os.path.isfile(sf_path): print(f"ERROR: Stockfish not found at {sf_path}") sys.exit(1) output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) tiers = TIERS if args.tier is not None: matched = [t for t in TIERS if t["name"] == args.tier] if not matched: valid = ", ".join(t["name"] for t in TIERS) print(f"ERROR: unknown tier '{args.tier}'. Valid: {valid}") sys.exit(1) tiers = matched for tier in tiers: if args.games is not None: tier = {**tier, "games": args.games} generate_tier( sf_path, output_dir, tier, args.workers, args.hash, args.temperature, args.multi_pv, args.sample_plies, ) print(f"\nAll done. Files in {output_dir}/") if __name__ == "__main__": main()