| |
| """Generate Lc0 self-play data as UCI move sequences. |
| |
| Drives a single Lc0 process via UCI (GPU is the bottleneck, not CPU). |
| Output format: one game per line, space-separated UCI moves followed by the result. |
| |
| e2e4 e7e5 g1f3 b8c6 ... 1-0 |
| |
| Two modes per network: |
| - Policy-only (nodes=1, Temperature > 0): raw NN policy sampling, no search. |
| - MCTS (nodes=N): full search with temperature for the opening. |
| |
| Tiers: |
| t1_policy: 128K games, T1-256x10, nodes=1 (policy-only) |
| t1_mcts_128: 128K games, T1-256x10, nodes=128 |
| t3_policy: 128K games, T3-512x15, nodes=1 (policy-only) |
| t3_mcts_128: 128K games, T3-512x15, nodes=128 |
| bt4_policy: 128K games, BT4-1024x15, nodes=1 (policy-only) |
| bt4_mcts_128: 128K games, BT4-1024x15, nodes=128 |
| |
| Usage: |
| python scripts/generate_lc0_data.py --output data/lc0/ |
| python scripts/generate_lc0_data.py --output data/lc0/ --tier bt4_policy |
| python scripts/generate_lc0_data.py --output data/lc0/ --games 1000 --backend cuda-auto |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import os |
| import subprocess |
| import sys |
| import time |
| from pathlib import Path |
|
|
| DEFAULT_NET_DIR = "/opt/lc0_nets" |
|
|
| NETS = { |
| "t1": "t1-256x10.pb.gz", |
| "t3": "t3-512x15.pb.gz", |
| "bt4": "bt4-1024x15.pb.gz", |
| } |
|
|
| |
| |
| |
| TIERS = [ |
| {"name": "t1_policy", "net": "t1", "nodes": 1, "games": 128_000, "seed": 100_000}, |
| {"name": "t1_mcts_128", "net": "t1", "nodes": 128, "games": 128_000, "seed": 110_000}, |
| {"name": "t3_policy", "net": "t3", "nodes": 1, "games": 128_000, "seed": 200_000}, |
| {"name": "t3_mcts_128", "net": "t3", "nodes": 128, "games": 128_000, "seed": 210_000}, |
| {"name": "bt4_policy", "net": "bt4", "nodes": 1, "games": 128_000, "seed": 300_000}, |
| {"name": "bt4_mcts_128", "net": "bt4", "nodes": 128, "games": 128_000, "seed": 310_000}, |
| ] |
|
|
| |
| OPENING_TEMP = 1.0 |
| TEMP_DECAY_MOVES = 15 |
|
|
|
|
| class Lc0Engine: |
| def __init__(self, path: str, weights: str, backend: str = "cuda-auto"): |
| self.proc = subprocess.Popen( |
| [path, f"--weights={weights}"], |
| stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.DEVNULL, |
| text=True, |
| bufsize=1, |
| ) |
| self._send("uci") |
| self._wait_for("uciok") |
| self._send(f"setoption name Backend value {backend}") |
| self._send(f"setoption name Temperature value {OPENING_TEMP}") |
| self._send(f"setoption name TempDecayMoves value {TEMP_DECAY_MOVES}") |
| self._send("isready") |
| self._wait_for("readyok") |
|
|
| def _send(self, cmd: str): |
| self.proc.stdin.write(cmd + "\n") |
| self.proc.stdin.flush() |
|
|
| def _wait_for(self, token: str) -> list[str]: |
| lines = [] |
| while True: |
| line = self.proc.stdout.readline().strip() |
| lines.append(line) |
| if line.startswith(token): |
| return lines |
|
|
| def best_move(self, moves: list[str], nodes: int) -> str | None: |
| pos = "position startpos" |
| if moves: |
| pos += " moves " + " ".join(moves) |
| self._send(pos) |
| self._send(f"go nodes {nodes}") |
| lines = self._wait_for("bestmove") |
| for line in lines: |
| if line.startswith("bestmove"): |
| parts = line.split() |
| move = parts[1] if len(parts) > 1 else None |
| if move == "(none)": |
| return None |
| return move |
| return None |
|
|
| def check_terminal(self, moves: list[str]) -> str | None: |
| """Check if position after moves is terminal. |
| |
| Returns 'checkmate' or 'stalemate' or None. |
| """ |
| pos = "position startpos" |
| if moves: |
| pos += " moves " + " ".join(moves) |
| self._send(pos) |
| self._send("go nodes 1") |
| lines = self._wait_for("bestmove") |
|
|
| is_terminal = False |
| for line in lines: |
| if line.startswith("bestmove"): |
| parts = line.split() |
| move = parts[1] if len(parts) > 1 else None |
| if move == "(none)" or move is None: |
| is_terminal = True |
| break |
|
|
| if not is_terminal: |
| return None |
|
|
| |
| for line in lines: |
| if line.startswith("info") and "score" in line: |
| parts = line.split() |
| try: |
| si = parts.index("score") |
| if parts[si + 1] == "mate": |
| return "checkmate" |
| except (ValueError, IndexError): |
| pass |
| return "stalemate" |
|
|
| def new_game(self): |
| self._send("ucinewgame") |
| self._send("isready") |
| self._wait_for("readyok") |
|
|
| def close(self): |
| try: |
| self._send("quit") |
| self.proc.wait(timeout=5) |
| except Exception: |
| self.proc.kill() |
|
|
|
|
| def play_game( |
| engine: Lc0Engine, nodes: int, max_ply: int = 500 |
| ) -> tuple[list[str], str]: |
| """Play one self-play game. Returns (moves_uci, result).""" |
| engine.new_game() |
| moves: list[str] = [] |
|
|
| for _ in range(max_ply): |
| move = engine.best_move(moves, nodes) |
| if move is None: |
| break |
| moves.append(move) |
|
|
| n = len(moves) |
| if n == 0: |
| return moves, "*" |
|
|
| if n >= max_ply: |
| return moves, "1/2-1/2" |
|
|
| |
| terminal = engine.check_terminal(moves) |
| if terminal == "checkmate": |
| result = "0-1" if n % 2 == 0 else "1-0" |
| elif terminal == "stalemate": |
| result = "1/2-1/2" |
| else: |
| result = "*" |
|
|
| return moves, result |
|
|
|
|
| def generate_tier( |
| lc0_path: str, |
| net_dir: Path, |
| output_dir: Path, |
| tier: dict, |
| backend: str, |
| ): |
| nodes = tier["nodes"] |
| total_games = tier["games"] |
| name = tier["name"] |
| net_file = net_dir / NETS[tier["net"]] |
| out_path = output_dir / f"{name}.txt" |
|
|
| print(f"\n{'=' * 60}") |
| print(f"Generating {total_games:,} games: {name}") |
| print(f"Network: {net_file.name}") |
| print(f"Nodes: {nodes} ({'policy-only' if nodes == 1 else 'MCTS'})") |
| print(f"Backend: {backend}") |
| print(f"Temperature: {OPENING_TEMP} for {TEMP_DECAY_MOVES} plies, then greedy") |
| print(f"Output: {out_path}") |
| print(f"{'=' * 60}") |
|
|
| if not net_file.exists(): |
| print(f"ERROR: Network not found: {net_file}") |
| sys.exit(1) |
|
|
| engine = Lc0Engine(lc0_path, str(net_file), backend=backend) |
|
|
| t0 = time.perf_counter() |
| with open(out_path, "w") as f: |
| for i in range(1, total_games + 1): |
| moves, result = play_game(engine, nodes) |
| line = " ".join(moves) + " " + result |
| f.write(line + "\n") |
|
|
| if i % 100 == 0 or i == total_games: |
| elapsed = time.perf_counter() - t0 |
| rate = i / elapsed |
| eta = (total_games - i) / rate if rate > 0 else 0 |
| print( |
| f" {i:>7,}/{total_games:,} ({i/total_games:.1%}) " |
| f"{rate:.1f} games/s ETA {eta/60:.0f}m " |
| f"last: {len(moves)} ply, {result}", |
| end="\r", |
| ) |
|
|
| elapsed = time.perf_counter() - t0 |
| engine.close() |
|
|
| print(f"\n Done: {total_games:,} games in {elapsed / 60:.1f}m") |
| print(f" Rate: {total_games / elapsed:.1f} games/s") |
| print(f" File: {out_path} ({out_path.stat().st_size / 1e6:.1f} MB)") |
|
|
| return out_path |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Generate Lc0 self-play UCI data (GPU)" |
| ) |
| parser.add_argument( |
| "--lc0", |
| type=str, |
| default="lc0", |
| help="Path to lc0 binary (default: lc0 on PATH)", |
| ) |
| parser.add_argument( |
| "--net-dir", |
| type=str, |
| default=DEFAULT_NET_DIR, |
| help="Directory containing .pb.gz network files", |
| ) |
| parser.add_argument( |
| "--output", type=str, default="data/lc0", help="Output directory" |
| ) |
| parser.add_argument( |
| "--tier", |
| type=str, |
| default=None, |
| help="Only generate this tier (e.g. bt4_policy)", |
| ) |
| parser.add_argument( |
| "--games", type=int, default=None, help="Override number of games per tier" |
| ) |
| parser.add_argument( |
| "--backend", |
| type=str, |
| default="cuda-auto", |
| help="Lc0 backend (cuda-auto, cudnn-fp16, opencl, eigen)", |
| ) |
| args = parser.parse_args() |
|
|
| |
| lc0_path = args.lc0 |
| if not os.path.isfile(lc0_path): |
| |
| import shutil |
| found = shutil.which(lc0_path) |
| if not found: |
| print(f"ERROR: lc0 not found at '{lc0_path}' or on PATH") |
| sys.exit(1) |
| lc0_path = found |
|
|
| net_dir = Path(args.net_dir) |
| if not net_dir.is_dir(): |
| print(f"ERROR: Network directory not found: {net_dir}") |
| sys.exit(1) |
|
|
| output_dir = Path(args.output) |
| output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| tiers = TIERS |
| if args.tier is not None: |
| matched = [t for t in TIERS if t["name"] == args.tier] |
| if not matched: |
| valid = ", ".join(t["name"] for t in TIERS) |
| print(f"ERROR: unknown tier '{args.tier}'. Valid: {valid}") |
| sys.exit(1) |
| tiers = matched |
|
|
| for tier in tiers: |
| if args.games is not None: |
| tier = {**tier, "games": args.games} |
| generate_tier(lc0_path, net_dir, output_dir, tier, args.backend) |
|
|
| print(f"\nAll done. Files in {output_dir}/") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|