#!/usr/bin/env python3 """Generate Lc0 self-play data as UCI move sequences. Drives a single Lc0 process via UCI (GPU is the bottleneck, not CPU). Output format: one game per line, space-separated UCI moves followed by the result. e2e4 e7e5 g1f3 b8c6 ... 1-0 Two modes per network: - Policy-only (nodes=1, Temperature > 0): raw NN policy sampling, no search. - MCTS (nodes=N): full search with temperature for the opening. Tiers: t1_policy: 128K games, T1-256x10, nodes=1 (policy-only) t1_mcts_128: 128K games, T1-256x10, nodes=128 t3_policy: 128K games, T3-512x15, nodes=1 (policy-only) t3_mcts_128: 128K games, T3-512x15, nodes=128 bt4_policy: 128K games, BT4-1024x15, nodes=1 (policy-only) bt4_mcts_128: 128K games, BT4-1024x15, nodes=128 Usage: python scripts/generate_lc0_data.py --output data/lc0/ python scripts/generate_lc0_data.py --output data/lc0/ --tier bt4_policy python scripts/generate_lc0_data.py --output data/lc0/ --games 1000 --backend cuda-auto """ from __future__ import annotations import argparse import os import subprocess import sys import time from pathlib import Path DEFAULT_NET_DIR = "/opt/lc0_nets" NETS = { "t1": "t1-256x10.pb.gz", "t3": "t3-512x15.pb.gz", "bt4": "bt4-1024x15.pb.gz", } # Hardcoded, non-conflicting seeds per tier. # Lc0 doesn't have per-game seeds — diversity comes from Temperature sampling. # These seeds control Python-side RNG for any supplementary randomness. TIERS = [ {"name": "t1_policy", "net": "t1", "nodes": 1, "games": 128_000, "seed": 100_000}, {"name": "t1_mcts_128", "net": "t1", "nodes": 128, "games": 128_000, "seed": 110_000}, {"name": "t3_policy", "net": "t3", "nodes": 1, "games": 128_000, "seed": 200_000}, {"name": "t3_mcts_128", "net": "t3", "nodes": 128, "games": 128_000, "seed": 210_000}, {"name": "bt4_policy", "net": "bt4", "nodes": 1, "games": 128_000, "seed": 300_000}, {"name": "bt4_mcts_128", "net": "bt4", "nodes": 128, "games": 128_000, "seed": 310_000}, ] # Temperature settings: sample from policy during opening, greedy after. OPENING_TEMP = 1.0 # temperature for the first TEMP_DECAY_MOVES plies TEMP_DECAY_MOVES = 15 # after this many plies, temperature drops to 0 (greedy) class Lc0Engine: def __init__(self, path: str, weights: str, backend: str = "cuda-auto"): self.proc = subprocess.Popen( [path, f"--weights={weights}"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, bufsize=1, ) self._send("uci") self._wait_for("uciok") self._send(f"setoption name Backend value {backend}") self._send(f"setoption name Temperature value {OPENING_TEMP}") self._send(f"setoption name TempDecayMoves value {TEMP_DECAY_MOVES}") self._send("isready") self._wait_for("readyok") def _send(self, cmd: str): self.proc.stdin.write(cmd + "\n") self.proc.stdin.flush() def _wait_for(self, token: str) -> list[str]: lines = [] while True: line = self.proc.stdout.readline().strip() lines.append(line) if line.startswith(token): return lines def best_move(self, moves: list[str], nodes: int) -> str | None: pos = "position startpos" if moves: pos += " moves " + " ".join(moves) self._send(pos) self._send(f"go nodes {nodes}") lines = self._wait_for("bestmove") for line in lines: if line.startswith("bestmove"): parts = line.split() move = parts[1] if len(parts) > 1 else None if move == "(none)": return None return move return None def check_terminal(self, moves: list[str]) -> str | None: """Check if position after moves is terminal. Returns 'checkmate' or 'stalemate' or None. """ pos = "position startpos" if moves: pos += " moves " + " ".join(moves) self._send(pos) self._send("go nodes 1") lines = self._wait_for("bestmove") is_terminal = False for line in lines: if line.startswith("bestmove"): parts = line.split() move = parts[1] if len(parts) > 1 else None if move == "(none)" or move is None: is_terminal = True break if not is_terminal: return None # Distinguish mate vs stalemate from info score for line in lines: if line.startswith("info") and "score" in line: parts = line.split() try: si = parts.index("score") if parts[si + 1] == "mate": return "checkmate" except (ValueError, IndexError): pass return "stalemate" def new_game(self): self._send("ucinewgame") self._send("isready") self._wait_for("readyok") def close(self): try: self._send("quit") self.proc.wait(timeout=5) except Exception: self.proc.kill() def play_game( engine: Lc0Engine, nodes: int, max_ply: int = 500 ) -> tuple[list[str], str]: """Play one self-play game. Returns (moves_uci, result).""" engine.new_game() moves: list[str] = [] for _ in range(max_ply): move = engine.best_move(moves, nodes) if move is None: break moves.append(move) n = len(moves) if n == 0: return moves, "*" if n >= max_ply: return moves, "1/2-1/2" # Game ended mid-play — check why terminal = engine.check_terminal(moves) if terminal == "checkmate": result = "0-1" if n % 2 == 0 else "1-0" elif terminal == "stalemate": result = "1/2-1/2" else: result = "*" return moves, result def generate_tier( lc0_path: str, net_dir: Path, output_dir: Path, tier: dict, backend: str, ): nodes = tier["nodes"] total_games = tier["games"] name = tier["name"] net_file = net_dir / NETS[tier["net"]] out_path = output_dir / f"{name}.txt" print(f"\n{'=' * 60}") print(f"Generating {total_games:,} games: {name}") print(f"Network: {net_file.name}") print(f"Nodes: {nodes} ({'policy-only' if nodes == 1 else 'MCTS'})") print(f"Backend: {backend}") print(f"Temperature: {OPENING_TEMP} for {TEMP_DECAY_MOVES} plies, then greedy") print(f"Output: {out_path}") print(f"{'=' * 60}") if not net_file.exists(): print(f"ERROR: Network not found: {net_file}") sys.exit(1) engine = Lc0Engine(lc0_path, str(net_file), backend=backend) t0 = time.perf_counter() with open(out_path, "w") as f: for i in range(1, total_games + 1): moves, result = play_game(engine, nodes) line = " ".join(moves) + " " + result f.write(line + "\n") if i % 100 == 0 or i == total_games: elapsed = time.perf_counter() - t0 rate = i / elapsed eta = (total_games - i) / rate if rate > 0 else 0 print( f" {i:>7,}/{total_games:,} ({i/total_games:.1%}) " f"{rate:.1f} games/s ETA {eta/60:.0f}m " f"last: {len(moves)} ply, {result}", end="\r", ) elapsed = time.perf_counter() - t0 engine.close() print(f"\n Done: {total_games:,} games in {elapsed / 60:.1f}m") print(f" Rate: {total_games / elapsed:.1f} games/s") print(f" File: {out_path} ({out_path.stat().st_size / 1e6:.1f} MB)") return out_path def main(): parser = argparse.ArgumentParser( description="Generate Lc0 self-play UCI data (GPU)" ) parser.add_argument( "--lc0", type=str, default="lc0", help="Path to lc0 binary (default: lc0 on PATH)", ) parser.add_argument( "--net-dir", type=str, default=DEFAULT_NET_DIR, help="Directory containing .pb.gz network files", ) parser.add_argument( "--output", type=str, default="data/lc0", help="Output directory" ) parser.add_argument( "--tier", type=str, default=None, help="Only generate this tier (e.g. bt4_policy)", ) parser.add_argument( "--games", type=int, default=None, help="Override number of games per tier" ) parser.add_argument( "--backend", type=str, default="cuda-auto", help="Lc0 backend (cuda-auto, cudnn-fp16, opencl, eigen)", ) args = parser.parse_args() # Resolve lc0 path lc0_path = args.lc0 if not os.path.isfile(lc0_path): # Try PATH import shutil found = shutil.which(lc0_path) if not found: print(f"ERROR: lc0 not found at '{lc0_path}' or on PATH") sys.exit(1) lc0_path = found net_dir = Path(args.net_dir) if not net_dir.is_dir(): print(f"ERROR: Network directory not found: {net_dir}") sys.exit(1) output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) tiers = TIERS if args.tier is not None: matched = [t for t in TIERS if t["name"] == args.tier] if not matched: valid = ", ".join(t["name"] for t in TIERS) print(f"ERROR: unknown tier '{args.tier}'. Valid: {valid}") sys.exit(1) tiers = matched for tier in tiers: if args.games is not None: tier = {**tier, "games": args.games} generate_tier(lc0_path, net_dir, output_dir, tier, args.backend) print(f"\nAll done. Files in {output_dir}/") if __name__ == "__main__": main()