ModuleMind

Running on Zero

App Files Files Community

ModuleMind / modular_mind.py

Quazim0t0

Add files using upload-large-folder tool

45e7dfb verified 10 days ago

raw

history blame contribute delete

7.95 kB

	"""
	modular_mind.py -- numpy inference for the trained Modular Mind boss brain.

	Loads the weights produced by train.py (mm_weights.npz) and runs the exact same
	forward pass as mm_torch.ModularMindPolicy, in pure numpy (no torch needed at game
	runtime -> tiny, fast, instant cold-start on a HuggingFace Space).

	decide(state) returns the chosen boss action plus rich telemetry (per-specialist
	drives, the shared latent, the coordinator's modulation) so the game can VISUALISE
	the Modular Mind making each decision -- including the two modulator specialists
	(Punisher, Enrage) whose only influence is the latent they write into the bridge.
	"""
	from __future__ import annotations

	import os

	import numpy as np

	from features import ACTIONS, NF, extract_features, legal_mask

	# must match mm_torch.SPEC_DEFS
	SPEC_DEFS = [
	("Aggressor", "CLEAVE", "#ff4d4d"),
	("Stalker", "APPROACH", "#4da6ff"),
	("Survivor", "RETREAT", "#9b59b6"),
	("Baiter", "IDLE", "#f1c40f"),
	("Defender", "BLOCK", "#48b0c4"),
	("Punisher", None, "#e67e22"),
	("Enrage", None, "#c0392b"),
	]
	WEIGHTS_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mm_weights.npz")
	H, D_LATENT = 24, 12


	def _layernorm(x, w, b, eps=1e-5):
	mu = x.mean()
	var = ((x - mu) ** 2).mean()
	return (x - mu) / np.sqrt(var + eps) * w + b


	def _relu(x):
	return np.maximum(x, 0.0)


	class ModularMind:
	def __init__(self, weights_path=WEIGHTS_PATH):
	if os.path.exists(weights_path):
	self.W = {k: v for k, v in np.load(weights_path).items()}
	self.trained = True
	else: # fallback so the game still runs before training finishes
	self.W = self._random_weights()
	self.trained = False

	def _random_weights(self):
	rng = np.random.default_rng(0)
	W = {}
	for i, (_, owns, _) in enumerate(SPEC_DEFS):
	W[f"s{i}_fc1_w"] = rng.normal(0, .3, (H, NF))
	W[f"s{i}_fc1_b"] = np.zeros(H)
	W[f"s{i}_lat_w"] = rng.normal(0, .3, (D_LATENT, H))
	W[f"s{i}_lat_b"] = np.zeros(D_LATENT)
	if owns is not None:
	W[f"s{i}_drv_w"] = rng.normal(0, .3, (1, H))
	W[f"s{i}_drv_b"] = np.zeros(1)
	W["link_ni_w"] = np.ones(D_LATENT); W["link_ni_b"] = np.zeros(D_LATENT)
	W["link_v"] = rng.normal(0, .3, (2 * D_LATENT, D_LATENT))
	W["link_g"] = rng.normal(0, .3, (2 * D_LATENT, D_LATENT))
	W["link_d"] = rng.normal(0, .3, (D_LATENT, 2 * D_LATENT))
	W["link_no_w"] = np.ones(D_LATENT); W["link_no_b"] = np.zeros(D_LATENT)
	W["coord_w"] = rng.normal(0, .3, (len(ACTIONS), D_LATENT))
	W["coord_b"] = np.zeros(len(ACTIONS))
	return W

	def decide(self, state: dict, explore: float = 0.06):
	W = self.W
	f = extract_features(state).astype(np.float64)

	drives = np.zeros(len(ACTIONS))
	latents = []
	per_spec = []
	for i, (name, owns, color) in enumerate(SPEC_DEFS):
	h = np.tanh(W[f"s{i}_fc1_w"] @ f + W[f"s{i}_fc1_b"])
	lat = W[f"s{i}_lat_w"] @ h + W[f"s{i}_lat_b"]
	latents.append(lat)
	drv = None
	if owns is not None:
	drv = float((W[f"s{i}_drv_w"] @ h + W[f"s{i}_drv_b"]).item())
	drives[ACTIONS.index(owns)] += drv
	per_spec.append({
	"name": name, "owns": owns, "color": color,
	"drive": round(drv, 3) if drv is not None else None,
	"latent_norm": round(float(np.linalg.norm(lat)), 3),
	})

	# RecursiveLink: shared latent bridge
	z = np.sum(latents, axis=0)
	zn = _layernorm(z, W["link_ni_w"], W["link_ni_b"])
	reglu = _relu(W["link_g"] @ zn) * (W["link_v"] @ zn)
	out = W["link_d"] @ reglu
	shared = _layernorm(out + z, W["link_no_w"], W["link_no_b"])

	# Coordinator read-out (modulator influence flows in here)
	modulation = W["coord_w"] @ shared + W["coord_b"]
	logits = drives + modulation

	mask = legal_mask(state)
	masked = np.where(mask > 0.5, logits, -1e9)

	probs = np.exp(masked - masked.max())
	probs = probs / probs.sum()
	# `explore` is a true mistake-rate: with prob `explore` take a uniformly
	# random LEGAL action (the difficulty dial), else the best action. (Sampling
	# from `probs` barely degrades play -- the policy is too confident -- so we
	# use uniform mistakes to make easier tiers actually easier.)
	if explore > 0 and np.random.random() < explore:
	legal_idx = np.where(mask > 0.5)[0]
	choice = int(np.random.choice(legal_idx))
	else:
	choice = int(np.argmax(masked))

	phase = 2 if state.get("bossHP", 1.0) < 0.5 else 1
	return {
	"action": ACTIONS[choice],
	"phase": phase,
	"trained": self.trained,
	"specialists": per_spec,
	"base_drive": {a: round(float(d), 3) for a, d in zip(ACTIONS, drives)},
	"modulation": {a: round(float(m), 3) for a, m in zip(ACTIONS, modulation)},
	"final_drive": {a: round(float(d), 3) for a, d in zip(ACTIONS, logits)},
	"probs": {a: round(float(p), 3) for a, p in zip(ACTIONS, probs)},
	"legal": {a: bool(m > 0.5) for a, m in zip(ACTIONS, mask)},
	"shared_latent": [round(float(x), 3) for x in shared],
	}


	# ---- difficulty tiers --------------------------------------------------------
	# Each tier = a different training checkpoint PLUS a decision-noise level. Easy is a
	# partially-trained brain that also "thinks loosely" (more exploration -> more
	# mistakes, more openings for the player); Hard is the fully-trained brain playing
	# sharply. Missing weight files fall back to the hard weights.
	_DIR = os.path.dirname(os.path.abspath(__file__))
	# Difficulty = decision-noise on the trained brain. `explore` is the mistake-rate
	# (prob of a random legal action); higher = the boss makes more exploitable mistakes.
	# (Once the boss learned to BLOCK it dominates the sim even early in training, so a
	# "weaker checkpoint" no longer yields an easy boss -- noise is the controllable dial:
	# vs a near-optimal dodger these give boss win-rates ~0.35 / ~0.65 / ~0.95.)
	DIFFICULTY = {
	"easy": {"file": "mm_weights.npz", "explore": 0.50},
	"normal": {"file": "mm_weights.npz", "explore": 0.22},
	"hard": {"file": "mm_weights.npz", "explore": 0.04},
	}
	_MINDS: dict[str, "ModularMind"] = {}


	def get_mind(difficulty: str = "hard") -> ModularMind:
	key = difficulty if difficulty in DIFFICULTY else "hard"
	if key not in _MINDS:
	path = os.path.join(_DIR, DIFFICULTY[key]["file"])
	if not os.path.exists(path): # tier not trained yet
	path = os.path.join(_DIR, DIFFICULTY["hard"]["file"])
	mind = ModularMind(path)
	# the HARD tier shares the online learner's live (player-adapted) weights,
	# so finetuning from real fights takes effect immediately. (Lazy import to
	# avoid an import cycle: online -> mm_grad -> modular_mind.)
	if key == "hard":
	try:
	import online
	if online.ENABLED:
	mind.W = online.live_weights()
	mind.trained = True
	except Exception as e:
	print(f"[modular_mind] online weights not shared ({e})")
	_MINDS[key] = mind
	return _MINDS[key]


	def decide(state: dict) -> dict:
	"""Route a decision to the brain for the requested difficulty tier (each tier
	has its own checkpoint and exploration/mistake level)."""
	key = str(state.get("difficulty", "hard"))
	if key not in DIFFICULTY:
	key = "hard"
	out = get_mind(key).decide(state, explore=DIFFICULTY[key]["explore"])
	out["difficulty"] = key
	return out