File size: 10,087 Bytes
75cf8a6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | #!/usr/bin/env python3
"""Generate Lc0 self-play data as UCI move sequences.
Drives a single Lc0 process via UCI (GPU is the bottleneck, not CPU).
Output format: one game per line, space-separated UCI moves followed by the result.
e2e4 e7e5 g1f3 b8c6 ... 1-0
Two modes per network:
- Policy-only (nodes=1, Temperature > 0): raw NN policy sampling, no search.
- MCTS (nodes=N): full search with temperature for the opening.
Tiers:
t1_policy: 128K games, T1-256x10, nodes=1 (policy-only)
t1_mcts_128: 128K games, T1-256x10, nodes=128
t3_policy: 128K games, T3-512x15, nodes=1 (policy-only)
t3_mcts_128: 128K games, T3-512x15, nodes=128
bt4_policy: 128K games, BT4-1024x15, nodes=1 (policy-only)
bt4_mcts_128: 128K games, BT4-1024x15, nodes=128
Usage:
python scripts/generate_lc0_data.py --output data/lc0/
python scripts/generate_lc0_data.py --output data/lc0/ --tier bt4_policy
python scripts/generate_lc0_data.py --output data/lc0/ --games 1000 --backend cuda-auto
"""
from __future__ import annotations
import argparse
import os
import subprocess
import sys
import time
from pathlib import Path
DEFAULT_NET_DIR = "/opt/lc0_nets"
NETS = {
"t1": "t1-256x10.pb.gz",
"t3": "t3-512x15.pb.gz",
"bt4": "bt4-1024x15.pb.gz",
}
# Hardcoded, non-conflicting seeds per tier.
# Lc0 doesn't have per-game seeds — diversity comes from Temperature sampling.
# These seeds control Python-side RNG for any supplementary randomness.
TIERS = [
{"name": "t1_policy", "net": "t1", "nodes": 1, "games": 128_000, "seed": 100_000},
{"name": "t1_mcts_128", "net": "t1", "nodes": 128, "games": 128_000, "seed": 110_000},
{"name": "t3_policy", "net": "t3", "nodes": 1, "games": 128_000, "seed": 200_000},
{"name": "t3_mcts_128", "net": "t3", "nodes": 128, "games": 128_000, "seed": 210_000},
{"name": "bt4_policy", "net": "bt4", "nodes": 1, "games": 128_000, "seed": 300_000},
{"name": "bt4_mcts_128", "net": "bt4", "nodes": 128, "games": 128_000, "seed": 310_000},
]
# Temperature settings: sample from policy during opening, greedy after.
OPENING_TEMP = 1.0 # temperature for the first TEMP_DECAY_MOVES plies
TEMP_DECAY_MOVES = 15 # after this many plies, temperature drops to 0 (greedy)
class Lc0Engine:
def __init__(self, path: str, weights: str, backend: str = "cuda-auto"):
self.proc = subprocess.Popen(
[path, f"--weights={weights}"],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
bufsize=1,
)
self._send("uci")
self._wait_for("uciok")
self._send(f"setoption name Backend value {backend}")
self._send(f"setoption name Temperature value {OPENING_TEMP}")
self._send(f"setoption name TempDecayMoves value {TEMP_DECAY_MOVES}")
self._send("isready")
self._wait_for("readyok")
def _send(self, cmd: str):
self.proc.stdin.write(cmd + "\n")
self.proc.stdin.flush()
def _wait_for(self, token: str) -> list[str]:
lines = []
while True:
line = self.proc.stdout.readline().strip()
lines.append(line)
if line.startswith(token):
return lines
def best_move(self, moves: list[str], nodes: int) -> str | None:
pos = "position startpos"
if moves:
pos += " moves " + " ".join(moves)
self._send(pos)
self._send(f"go nodes {nodes}")
lines = self._wait_for("bestmove")
for line in lines:
if line.startswith("bestmove"):
parts = line.split()
move = parts[1] if len(parts) > 1 else None
if move == "(none)":
return None
return move
return None
def check_terminal(self, moves: list[str]) -> str | None:
"""Check if position after moves is terminal.
Returns 'checkmate' or 'stalemate' or None.
"""
pos = "position startpos"
if moves:
pos += " moves " + " ".join(moves)
self._send(pos)
self._send("go nodes 1")
lines = self._wait_for("bestmove")
is_terminal = False
for line in lines:
if line.startswith("bestmove"):
parts = line.split()
move = parts[1] if len(parts) > 1 else None
if move == "(none)" or move is None:
is_terminal = True
break
if not is_terminal:
return None
# Distinguish mate vs stalemate from info score
for line in lines:
if line.startswith("info") and "score" in line:
parts = line.split()
try:
si = parts.index("score")
if parts[si + 1] == "mate":
return "checkmate"
except (ValueError, IndexError):
pass
return "stalemate"
def new_game(self):
self._send("ucinewgame")
self._send("isready")
self._wait_for("readyok")
def close(self):
try:
self._send("quit")
self.proc.wait(timeout=5)
except Exception:
self.proc.kill()
def play_game(
engine: Lc0Engine, nodes: int, max_ply: int = 500
) -> tuple[list[str], str]:
"""Play one self-play game. Returns (moves_uci, result)."""
engine.new_game()
moves: list[str] = []
for _ in range(max_ply):
move = engine.best_move(moves, nodes)
if move is None:
break
moves.append(move)
n = len(moves)
if n == 0:
return moves, "*"
if n >= max_ply:
return moves, "1/2-1/2"
# Game ended mid-play — check why
terminal = engine.check_terminal(moves)
if terminal == "checkmate":
result = "0-1" if n % 2 == 0 else "1-0"
elif terminal == "stalemate":
result = "1/2-1/2"
else:
result = "*"
return moves, result
def generate_tier(
lc0_path: str,
net_dir: Path,
output_dir: Path,
tier: dict,
backend: str,
):
nodes = tier["nodes"]
total_games = tier["games"]
name = tier["name"]
net_file = net_dir / NETS[tier["net"]]
out_path = output_dir / f"{name}.txt"
print(f"\n{'=' * 60}")
print(f"Generating {total_games:,} games: {name}")
print(f"Network: {net_file.name}")
print(f"Nodes: {nodes} ({'policy-only' if nodes == 1 else 'MCTS'})")
print(f"Backend: {backend}")
print(f"Temperature: {OPENING_TEMP} for {TEMP_DECAY_MOVES} plies, then greedy")
print(f"Output: {out_path}")
print(f"{'=' * 60}")
if not net_file.exists():
print(f"ERROR: Network not found: {net_file}")
sys.exit(1)
engine = Lc0Engine(lc0_path, str(net_file), backend=backend)
t0 = time.perf_counter()
with open(out_path, "w") as f:
for i in range(1, total_games + 1):
moves, result = play_game(engine, nodes)
line = " ".join(moves) + " " + result
f.write(line + "\n")
if i % 100 == 0 or i == total_games:
elapsed = time.perf_counter() - t0
rate = i / elapsed
eta = (total_games - i) / rate if rate > 0 else 0
print(
f" {i:>7,}/{total_games:,} ({i/total_games:.1%}) "
f"{rate:.1f} games/s ETA {eta/60:.0f}m "
f"last: {len(moves)} ply, {result}",
end="\r",
)
elapsed = time.perf_counter() - t0
engine.close()
print(f"\n Done: {total_games:,} games in {elapsed / 60:.1f}m")
print(f" Rate: {total_games / elapsed:.1f} games/s")
print(f" File: {out_path} ({out_path.stat().st_size / 1e6:.1f} MB)")
return out_path
def main():
parser = argparse.ArgumentParser(
description="Generate Lc0 self-play UCI data (GPU)"
)
parser.add_argument(
"--lc0",
type=str,
default="lc0",
help="Path to lc0 binary (default: lc0 on PATH)",
)
parser.add_argument(
"--net-dir",
type=str,
default=DEFAULT_NET_DIR,
help="Directory containing .pb.gz network files",
)
parser.add_argument(
"--output", type=str, default="data/lc0", help="Output directory"
)
parser.add_argument(
"--tier",
type=str,
default=None,
help="Only generate this tier (e.g. bt4_policy)",
)
parser.add_argument(
"--games", type=int, default=None, help="Override number of games per tier"
)
parser.add_argument(
"--backend",
type=str,
default="cuda-auto",
help="Lc0 backend (cuda-auto, cudnn-fp16, opencl, eigen)",
)
args = parser.parse_args()
# Resolve lc0 path
lc0_path = args.lc0
if not os.path.isfile(lc0_path):
# Try PATH
import shutil
found = shutil.which(lc0_path)
if not found:
print(f"ERROR: lc0 not found at '{lc0_path}' or on PATH")
sys.exit(1)
lc0_path = found
net_dir = Path(args.net_dir)
if not net_dir.is_dir():
print(f"ERROR: Network directory not found: {net_dir}")
sys.exit(1)
output_dir = Path(args.output)
output_dir.mkdir(parents=True, exist_ok=True)
tiers = TIERS
if args.tier is not None:
matched = [t for t in TIERS if t["name"] == args.tier]
if not matched:
valid = ", ".join(t["name"] for t in TIERS)
print(f"ERROR: unknown tier '{args.tier}'. Valid: {valid}")
sys.exit(1)
tiers = matched
for tier in tiers:
if args.games is not None:
tier = {**tier, "games": args.games}
generate_tier(lc0_path, net_dir, output_dir, tier, args.backend)
print(f"\nAll done. Files in {output_dir}/")
if __name__ == "__main__":
main()
|