Spaces:
Running on Zero
Running on Zero
File size: 30,813 Bytes
45e7dfb dbba743 45e7dfb dbba743 45e7dfb dbba743 45e7dfb dbba743 45e7dfb dbba743 45e7dfb dbba743 45e7dfb dbba743 45e7dfb dbba743 45e7dfb dbba743 45e7dfb 281ec36 b300163 45e7dfb 803668b a20b79e 803668b b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb b300163 45e7dfb 281ec36 45e7dfb dbba743 b300163 45e7dfb dbba743 45e7dfb dbba743 45e7dfb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 | """
app.py -- Modular Mind: Boss Fight (HuggingFace Space entry point).
A 2D Dark-Souls-style duel. The boss (Demon Slime) is driven by a tiny Modular
Mind: six specialist networks emit latents that a RecursiveLink merges into one
shared latent, and a coordinator reads it to pick the boss's next move. The brain
was trained by self-play reinforcement learning (see train.py / duel_sim.py).
The browser renders the fight at 60fps; at each decision point it calls the Python
brain through this app's /decide endpoint and shows the Modular Mind deciding live.
"""
import json
import os
import sys
from urllib.parse import quote
import gradio as gr
import modular_mind
import online
# the MoE-experts experiment lives in ./agents (added to the bottom of the page)
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "agents"))
try:
from panel import build_moe_panel
except Exception as _e: # agents optional -> game still runs without it
build_moe_panel = None
print(f"[app] MoE experiment panel unavailable ({_e})")
HERE = os.path.dirname(os.path.abspath(__file__))
# the self-playing piano (a Modular Mind trained on a song) lives in ./piano
sys.path.insert(0, os.path.join(HERE, "piano"))
_PIANO = {"player": None, "tried": False}
def _get_piano():
if not _PIANO["tried"]:
_PIANO["tried"] = True
try:
from poly_mind import PolyPlayer
_PIANO["player"] = PolyPlayer()
except Exception as e:
print(f"[app] piano Modular Mind unavailable ({e})")
return _PIANO["player"]
try:
_pmeta = json.load(open(os.path.join(HERE, "piano", "poly_notes.json")))
PIANO_LO, PIANO_HI, PIANO_FPS = _pmeta["midi_lo"], _pmeta["midi_hi"], _pmeta.get("fps", 8)
except Exception:
PIANO_LO, PIANO_HI, PIANO_FPS = 56, 86, 8
# the performance is restyled live into A minor with the bass lifted away (see
# piano/poly_mind.py stylize_midi), so the on-screen keyboard starts at middle C
PIANO_LO = max(PIANO_LO, 60)
_get_piano() # warm the piano Modular Mind at app startup (so the first play is instant)
def _read(path):
with open(path, "r", encoding="utf-8") as f:
return f.read()
CSS = _read(os.path.join(HERE, "web", "game.css"))
GAME_JS = _read(os.path.join(HERE, "web", "game.js"))
ASSETS_JS = _read(os.path.join(HERE, "assets_data.js"))
INDEX_HTML = _read(os.path.join(HERE, "web", "index.html"))
# music/sfx are served as static files by Gradio (allowed_paths below); the game
# builds audio URLs from this base.
AUDIO_DIR = os.path.join(HERE, "audio")
# URL-encode the absolute path (it may contain spaces) but keep "/" and the drive ":"
AUDIO_BASE_URL = (
"/gradio_api/file=" + quote(AUDIO_DIR.replace(os.sep, "/"), safe="/:") + "/"
)
# real acoustic-grand-piano note samples (served static; the piano plays the nearest
# sample pitch-shifted to each note, for a real piano sound instead of an oscillator).
PIANO_SAMPLES_DIR = os.path.join(HERE, "piano", "samples")
try:
PIANO_SAMPLE_MIDIS = sorted(int(f[:-4]) for f in os.listdir(PIANO_SAMPLES_DIR) if f.endswith(".mp3"))
except Exception:
PIANO_SAMPLE_MIDIS = []
PIANO_SAMPLE_BASE = "/gradio_api/file=" + quote(PIANO_SAMPLES_DIR.replace(os.sep, "/"), safe="/:") + "/"
# warm the default brain
modular_mind.get_mind("hard")
def decide(state_json: str) -> str:
"""Called by the browser at each boss decision point. In: game-state JSON
(includes a "difficulty" tier). Out: chosen action + telemetry, as JSON."""
try:
state = json.loads(state_json)
except Exception:
state = {}
return json.dumps(modular_mind.decide(state))
def learn(traj_json: str) -> str:
"""Called by the browser at the end of a fight with the full decision trajectory
+ outcome. Buffers it and periodically finetunes the HARD brain (REINFORCE)."""
try:
traj = json.loads(traj_json)
except Exception:
return json.dumps({"error": "bad json"})
return json.dumps(online.record_fight(traj))
def piano(payload_json: str) -> str:
"""Called by the browser's self-playing piano: in = {history:[tokens], n}, out =
{notes:[midi...], history:[...]}. The Modular Mind autoregressively generates the
next `n` notes from the recent history (server-side; history kept client-side)."""
try:
req = json.loads(payload_json)
except Exception:
req = {}
player = _get_piano()
hist = list(req.get("history") or [])
n = max(1, min(64, int(req.get("n", 32))))
if player is None:
return json.dumps({"notes": [], "history": hist, "error": "piano unavailable"})
if not hist:
hist = [list(f) for f in player.seed]
frames, telem = [], []
for _ in range(n):
toks, midis, tl = player.next_frame(hist)
hist.append(toks); frames.append([int(x) for x in midis]); telem.append(tl)
return json.dumps({"frames": frames, "telem": telem,
"history": [list(map(int, f)) for f in hist[-player.K:]]})
# Bootstrap (runs in the browser): wire window.MM_DECIDE to this app's /decide
# endpoint via Gradio's REST API (no external CDN), then boot the game once the
# gr.HTML canvas is in the DOM.
BOOTSTRAP_JS = """
(function () {
// route each boss decision to the Python Modular Mind through /gradio_api/call
window.MM_DECIDE = async (state) => {
const post = await fetch('/gradio_api/call/decide', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify({data: [JSON.stringify(state)]}),
});
const j = await post.json();
const res = await fetch('/gradio_api/call/decide/' + j.event_id);
const text = await res.text();
const line = text.split('\\n').filter(l => l.startsWith('data:')).pop();
const arr = JSON.parse(line.slice(5).trim());
return JSON.parse(arr[0]);
};
// send a finished fight's trajectory to the online learner (fire-and-forget)
window.MM_LEARN = async (traj) => {
try {
const post = await fetch('/gradio_api/call/learn', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify({data: [JSON.stringify(traj)]}),
});
const j = await post.json();
await fetch('/gradio_api/call/learn/' + j.event_id);
} catch (e) { /* learning is best-effort */ }
};
const tryBoot = () => {
if (document.getElementById('mm-canvas') && window.__mmBoot) window.__mmBoot();
else setTimeout(tryBoot, 80);
};
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', tryBoot);
else tryBoot();
})();
"""
# Force Gradio dark mode (matches the dark game) regardless of the visitor's browser
# setting, by ensuring the ?__theme=dark URL param is present before the app renders.
FORCE_DARK_JS = """
(function () {
try {
var p = new URLSearchParams(window.location.search);
if (p.get('__theme') !== 'dark') {
p.set('__theme', 'dark');
window.location.replace(window.location.pathname + '?' + p.toString() + window.location.hash);
}
} catch (e) {}
})();
"""
# ---- self-playing piano (a Modular Mind trained on the song) -----------------
PIANO_CSS = """
#mm-piano-wrap{max-width:920px;margin:6px auto 2px;font-family:system-ui,sans-serif}
#mm-piano-stage{position:relative;background:radial-gradient(ellipse at 50% 110%,#1b1430 0%,#0c0c14 62%,#08080e 100%);
border:1px solid #2a2a35;border-radius:10px;padding:0 8px 12px;overflow-x:auto;overflow-y:hidden}
#mm-piano-roll{display:block;width:100%;height:150px}
#mm-piano{display:flex;align-items:flex-end;justify-content:center;gap:2px;height:112px}
.pk{box-sizing:border-box;border:1px solid #05050a;border-radius:0 0 4px 4px;flex:0 0 auto;
transition:background .07s ease,box-shadow .07s ease}
.pk.white{width:20px;height:100px;background:linear-gradient(180deg,#f4f4f8 0%,#d6d6e0 88%,#b9b9c6 100%)}
.pk.black{width:14px;height:62px;background:linear-gradient(180deg,#3a3a46 0%,#1b1b23 100%)}
.pk.on{transform:translateY(1px)}
#mm-piano-ctrl{display:flex;gap:14px;align-items:center;justify-content:center;margin:8px auto 4px}
#mm-piano-btn{cursor:pointer;background:#2a9d6a;color:#fff;border:none;border-radius:6px;
padding:9px 18px;font-weight:700;font-size:14px}
#mm-piano-btn:hover{background:#33b87c}
#mm-piano-note{color:#9bd;font-size:13px;min-width:96px;text-align:left}
#mm-piano-specs{display:flex;gap:8px;justify-content:center;flex-wrap:wrap;margin:8px auto 2px;max-width:800px}
.psp{width:110px;background:#16161e;border:1px solid #2a2a35;border-radius:6px;padding:6px 9px}
.psp .nm{font-weight:700;font-size:12px}
.psp .ow{opacity:.55;font-size:10px;color:#aaa;margin-top:1px}
.psp .bar{height:7px;background:#2a2a33;border-radius:4px;margin-top:6px;overflow:hidden}
.psp .fill{height:100%;width:4%;border-radius:4px;transition:width .12s ease}
#mm-piano-lbl{text-align:center;color:#888;font-size:11px;margin-top:8px}
#mm-piano-latent{display:flex;gap:3px;justify-content:center;align-items:flex-end;height:22px;margin:5px auto}
#mm-piano-latent .lc{width:8px;height:3px;border-radius:2px;background:#3a3a48;transition:height .12s ease}
"""
PIANO_GLOBALS = (f"window.MM_PIANO_LO={PIANO_LO};window.MM_PIANO_HI={PIANO_HI};"
f"window.MM_PIANO_FPS={PIANO_FPS};"
f"window.MM_PIANO_SAMPLE_BASE={json.dumps(PIANO_SAMPLE_BASE)};"
f"window.MM_PIANO_SAMPLE_MIDIS={json.dumps(PIANO_SAMPLE_MIDIS)};")
PIANO_JS = r"""
(function(){
var SPC={Bass:'#4da6ff',Tenor:'#2ecc71',Soprano:'#ff6b9d',Sustain:'#1abc9c',Rest:'#95a5a6',Onset:'#e67e22',Phrase:'#9b59b6'};
window.__pianoBoot = function(){
var wrap=document.getElementById('mm-piano');
if(!wrap || wrap.dataset.built) return; wrap.dataset.built='1';
var LO=window.MM_PIANO_LO||56, HI=window.MM_PIANO_HI||86, BLACK={1:1,3:1,6:1,8:1,10:1};
for(var m=LO;m<=HI;m++){var k=document.createElement('div');
k.className='pk '+(BLACK[m%12]?'black':'white'); k.id='pk-'+m; wrap.appendChild(k);}
var audio=null, playing=false, queue=[], history=[], fetching=false, timer=null, voices={};
var specFills={}, built=false, buffers={}, loaded=false;
var PLAY_MS=Math.round(1000/(window.MM_PIANO_FPS||8))+58; // a touch slower = calmer feel
var noteEl=document.getElementById('mm-piano-note'), btn=document.getElementById('mm-piano-btn');
var specBox=document.getElementById('mm-piano-specs'), latBox=document.getElementById('mm-piano-latent');
var NN=['C','C#','D','D#','E','F','F#','G','G#','A','A#','B'];
function name(m){return NN[m%12]+(Math.floor(m/12)-1);}
// ---- light show: glowing note trails rise off the keys while they sound ----
var roll=document.getElementById('mm-piano-roll');
var rctx=roll?roll.getContext('2d'):null, trails=[], sparks=[], keyTrail={}, rafOn=false;
function hue(m){return ((m%12)*30+200)%360;} // pitch class -> color wheel
function ensureRaf(){ if(rctx && !rafOn){ rafOn=true; requestAnimationFrame(draw); } }
function draw(){
var w=roll.clientWidth, hgt=roll.clientHeight, now=performance.now(), v=0.05;
if(roll.width!==w) roll.width=w; if(roll.height!==hgt) roll.height=hgt;
rctx.clearRect(0,0,w,hgt);
for(var i=trails.length-1;i>=0;i--){ var tr=trails[i];
var top=hgt-(now-tr.t0)*v, bot=hgt-(tr.t1?(now-tr.t1)*v:0);
if(bot<-30){ trails.splice(i,1); continue; }
top=Math.max(top,-30);
var g=rctx.createLinearGradient(0,top,0,bot);
g.addColorStop(0,'hsla('+tr.h+',85%,62%,0)');
g.addColorStop(1,'hsla('+tr.h+',85%,62%,0.9)');
rctx.shadowColor='hsl('+tr.h+',85%,60%)'; rctx.shadowBlur=10;
rctx.fillStyle=g; rctx.fillRect(tr.x,top,tr.w,Math.max(2,bot-top));
}
rctx.shadowBlur=0;
for(var j=sparks.length-1;j>=0;j--){ var s=sparks[j], a=1-(now-s.t0)/650;
if(a<=0){ sparks.splice(j,1); continue; }
s.x+=s.vx; s.y+=s.vy;
rctx.fillStyle='hsla('+s.h+',95%,72%,'+a.toFixed(2)+')';
rctx.fillRect(s.x,s.y,2.2,2.2);
}
if(!playing && !trails.length && !sparks.length){ rafOn=false; rctx.clearRect(0,0,w,hgt); return; }
requestAnimationFrame(draw);
}
function strikeFx(m,el){
if(!rctx||!el) return;
var x=el.offsetLeft-roll.offsetLeft, wd=el.offsetWidth, hh=hue(m), now=performance.now();
var tr={x:x,w:wd,h:hh,t0:now,t1:null};
trails.push(tr); keyTrail[m]=tr;
for(var i=0;i<6;i++) sparks.push({x:x+wd/2,y:roll.clientHeight-3,
vx:(Math.random()-0.5)*1.6, vy:-(0.6+Math.random()*1.4), h:hh, t0:now});
ensureRaf();
}
function endTrail(m){ if(keyTrail[m]){ keyTrail[m].t1=performance.now(); delete keyTrail[m]; } }
function lightKey(m,on){
var el=document.getElementById('pk-'+m); if(!el) return el;
if(on){ el.classList.add('on'); var hh=hue(m);
el.style.background='hsl('+hh+',82%,'+(BLACK[m%12]?'46%':'68%')+')';
el.style.boxShadow='0 0 18px hsl('+hh+',85%,60%)';
} else { el.classList.remove('on'); el.style.background=''; el.style.boxShadow=''; }
return el;
}
function buildSpecs(telem){
if(built || !specBox || !telem) return; built=true;
telem.spec.forEach(function(s){
var c=SPC[s.name]||'#888', card=document.createElement('div'); card.className='psp';
card.innerHTML='<div class="nm" style="color:'+c+'">'+s.name+'</div>'+
'<div class="ow">'+(s.owns?('owns '+s.owns):'modulator')+'</div>'+
'<div class="bar"><div class="fill" style="background:'+c+'"></div></div>';
specBox.appendChild(card); specFills[s.name]=card.querySelector('.fill');
});
if(latBox){ for(var i=0;i<8;i++){var lc=document.createElement('div'); lc.className='lc'; latBox.appendChild(lc);} }
}
function updateSpecs(telem){
if(!telem) return; buildSpecs(telem);
telem.spec.forEach(function(s){
var h;
if(s.owns!=null && s.drive!=null){ h=Math.abs(s.drive)/4.0*100; } // owners: by drive
else { h=(s.act-16.0)/10.0*100; } // modulators: by latent pulse
h=Math.max(4,Math.min(100,h));
if(specFills[s.name]) specFills[s.name].style.width=h+'%';
});
if(latBox && telem.shared){ var lc=latBox.children;
for(var i=0;i<lc.length && i<telem.shared.length;i++){
lc[i].style.height=Math.max(2,Math.min(20,Math.abs(telem.shared[i])*9))+'px';
lc[i].style.background=telem.shared[i]>=0?'#5bbcdf':'#df7a5b';
} }
}
async function fetchPhrase(){
if(fetching) return; fetching=true;
try{
var post=await fetch('/gradio_api/call/piano',{method:'POST',
headers:{'Content-Type':'application/json'},
body:JSON.stringify({data:[JSON.stringify({history:history,n:32})]})});
var j=await post.json();
var res=await fetch('/gradio_api/call/piano/'+j.event_id);
var text=await res.text();
var line=text.split('\n').filter(function(l){return l.indexOf('data:')===0;}).pop();
var out=JSON.parse(JSON.parse(line.slice(5).trim())[0]);
history=out.history||history;
var fr=out.frames||[]; for(var i=0;i<fr.length;i++) queue.push({f:fr[i], t:(out.telem&&out.telem[i])||null});
}catch(e){}
fetching=false;
}
function loadSamples(){
if(loaded || !audio) return; loaded=true; // background load; play() upgrades to samples as they arrive
var ms=window.MM_PIANO_SAMPLE_MIDIS||[], base=window.MM_PIANO_SAMPLE_BASE||'';
ms.forEach(function(sm){
var ctl=('AbortController' in window)?new AbortController():null;
var to=ctl?setTimeout(function(){ctl.abort();},8000):0;
fetch(base+sm+'.mp3', ctl?{signal:ctl.signal}:{}).then(function(r){return r.arrayBuffer();})
.then(function(ab){audio.decodeAudioData(ab,function(buf){buffers[sm]=buf;},function(){});})
.catch(function(){}).finally(function(){if(to)clearTimeout(to);});
});
}
function nearest(m){ var ks=Object.keys(buffers); if(!ks.length) return null;
return ks.map(Number).reduce(function(a,b){return Math.abs(b-m)<Math.abs(a-m)?b:a;}); }
function voice(m, vol){ // real sample if it's loaded, else an oscillator -> ALWAYS audible
if(!audio) return null;
var sm=nearest(m), t=audio.currentTime;
if(sm!=null && buffers[sm]){
var src=audio.createBufferSource(); src.buffer=buffers[sm];
src.playbackRate.value=Math.pow(2,(m-sm)/12);
var g=audio.createGain(); g.gain.value=vol;
src.connect(g); g.connect(audio.destination); src.start(t);
return {src:src, gain:g};
}
var f=440*Math.pow(2,(m-69)/12);
var o1=audio.createOscillator(); o1.type='triangle'; o1.frequency.value=f;
var o2=audio.createOscillator(); o2.type='sine'; o2.frequency.value=f*2;
var g2=audio.createGain(); g2.gain.value=0.18;
var lp=audio.createBiquadFilter(); lp.type='lowpass'; lp.frequency.value=2600;
var g=audio.createGain();
g.gain.setValueAtTime(0.0001,t); g.gain.exponentialRampToValueAtTime(vol,t+0.014);
g.gain.exponentialRampToValueAtTime(Math.max(0.0001,vol*0.3),t+1.6);
o1.connect(lp); o2.connect(g2); g2.connect(lp); lp.connect(g); g.connect(audio.destination);
o1.start(t); o2.start(t);
return {oscs:[o1,o2], gain:g};
}
function releaseNode(nd){
if(!nd || !audio) return; var t=audio.currentTime;
try{ nd.gain.gain.cancelScheduledValues(t);
nd.gain.gain.setValueAtTime(Math.max(nd.gain.gain.value,0.0001),t);
nd.gain.gain.linearRampToValueAtTime(0.0001,t+0.10);
if(nd.src) nd.src.stop(t+0.13); if(nd.oscs) nd.oscs.forEach(function(o){o.stop(t+0.13);});
}catch(e){}
}
function releaseAll(){
for(var mk in voices){ releaseNode(voices[mk]); lightKey(+mk,false); endTrail(+mk); }
voices={};
}
function playFrame(midis){ // polyphony: strike new notes, hold sustained ones, release dropped ones
var nw={}; (midis||[]).forEach(function(m){ if(m>0) nw[m]=1; });
for(var mk in voices){ if(!nw[mk]){ releaseNode(voices[mk]);
lightKey(+mk,false); endTrail(+mk); delete voices[mk]; } }
var on=Object.keys(nw), vol=on.length>2?0.5:0.65;
on.forEach(function(ms){ var m=+ms; if(!voices[m]){ var v=voice(m,vol); if(v) voices[m]=v;
strikeFx(m, lightKey(m,true)); } });
if(noteEl){ noteEl.textContent= on.length ? ('βͺ '+on.map(function(ms){return name(+ms);}).join(' ')) : 'βͺ (rest)'; }
}
function tick(){ if(!playing) return; if(queue.length<10 && !fetching) fetchPhrase();
if(queue.length>0){ var it=queue.shift(); playFrame(it.f); updateSpecs(it.t); } }
function start(){
if(!audio) audio=new (window.AudioContext||window.webkitAudioContext)();
if(audio.state==='suspended'){ try{audio.resume();}catch(e){} }
loadSamples(); // real piano loads in background; oscillator plays until then
playing=true; btn.textContent='βΈ Pause'; ensureRaf();
if(queue.length===0) fetchPhrase();
if(!timer) timer=setInterval(tick, PLAY_MS);
}
function stop(){ playing=false; btn.textContent='βΆ Let the Modular Mind play'; releaseAll(); }
btn.onclick=function(){ playing?stop():start(); };
};
var t=function(){ if(document.getElementById('mm-piano')) window.__pianoBoot(); else setTimeout(t,120); };
if(document.readyState==='loading') document.addEventListener('DOMContentLoaded',t); else t();
})();
"""
PIANO_HTML = """
<div id="mm-piano-wrap">
<div id="mm-piano-stage">
<canvas id="mm-piano-roll"></canvas>
<div id="mm-piano"></div>
</div>
<div id="mm-piano-ctrl">
<button id="mm-piano-btn">βΆ Let the Modular Mind play</button>
<span id="mm-piano-note">βͺ</span>
</div>
<div id="mm-piano-lbl">restyled live into <b>A minor</b> β every note is lifted out of the bass and snapped to the minor scale Β· Bass / Tenor / Soprano own a register; Sustain / Onset / Phrase are modulators that only write to the shared latent</div>
<div id="mm-piano-specs"></div>
<div id="mm-piano-latent" title="RecursiveLink shared latent"></div>
</div>
"""
# Injected verbatim into the page <head>: dark-mode forcer, stylesheet, embedded sprite
# atlases, the game engine, the piano engine, and the bootstrap. (Inline <script> in <head>
# runs reliably; gr.HTML's innerHTML scripts do not.)
HEAD = (
f"<script>{FORCE_DARK_JS}</script>\n"
f"<style>{CSS}</style>\n"
f"<style>{PIANO_CSS}</style>\n"
f"<script>{ASSETS_JS}</script>\n"
f"<script>window.MM_AUDIO_BASE = {json.dumps(AUDIO_BASE_URL)};</script>\n"
f"<script>{PIANO_GLOBALS}</script>\n"
f"<script>{GAME_JS}</script>\n"
f"<script>{PIANO_JS}</script>\n"
f"<script>{BOOTSTRAP_JS}</script>\n"
)
INTRO = """
# π Modular Mind
A mini **Dark-Souls-style** duel where the boss is controlled by a **Modular Mind** β six tiny
specialist networks that communicate through a **shared latent** (RecursiveLink) and a coordinator
that picks each move. The brain was **trained by self-play reinforcement learning**, not scripted.
Watch the right-hand panel: every boss decision shows which specialists fired and how the modulators
steer the fight through the latent. **Click *Enter the Fog* and click the game once to focus, then play.**
"""
# ---- placeholder repo link (replace REPO_URL with your real GitHub URL) ------
REPO_URL = "#" # TODO: replace with the real repo
REPO_MD = f"""
"""
TECH_MD = r"""
## π§ How the model works (technical breakdown)
The boss brain is a faithful, **specialist-scale** implementation of the Modular Mind
architecture β small enough (~**4,500 parameters**, pure-NumPy inference) to decide in
well under a millisecond on a free CPU, yet structurally identical to the big idea:
**many small domain specialists that communicate through one shared latent.**
### The pieces
- **7 specialists** (tiny 2-layer MLPs). Five *own an action* and two *modulators* own none:
| Specialist | Owns | Role |
|---|---|---|
| **Aggressor** | `CLEAVE` | attack when in range |
| **Stalker** | `APPROACH` | close the distance |
| **Survivor** | `RETREAT` | reset spacing when it can't swing |
| **Baiter** | `IDLE` | wait / bait a whiff |
| **Defender** | `BLOCK` | guard the player's melee when it can't punish |
| **Punisher** | β *(modulator)* | detects "the player is open / recovering" |
| **Enrage** | β *(modulator)* | detects "we're low on HP β go berserk" |
- **`RecursiveLink`** β a ReGLU + residual block that merges the six latents into **one shared latent** (the "bridge").
- **Coordinator** β a linear read-out of the shared latent that nudges every action's score.
### What every specialist is doing *at one moment* (a single decision tick)
A souls boss commits to one move at a time, so the brain only fires when the boss is free
(~2β4 times/second). In that one forward pass, **all six specialists run in parallel**:
1. **Perceive** β the live game state is compressed to a **10-D feature vector** (distance, in-range?, boss HP, player HP, cooldown ready?, is the player attacking / recovering / blocking?).
2. **Specialise** β each specialist computes `h = tanh(WβΒ·features)` and emits a **latent vector** `zα΅’` (its "opinion"); the four action-owners also emit a scalar **drive** for their move.
3. **Communicate** β the six latents are summed and pushed through the **`RecursiveLink`** to form the **shared latent** `s`. This is the only channel the **modulators** have: *Punisher* writes "player is open" and *Enrage* writes "HP is low" into `s` β they cast no direct vote.
4. **Coordinate** β the **coordinator** reads `s` and produces a **modulation** added to each action's score. So `score(action) = (owner's drive) + (coordinator modulation)`. This is where "the player is open" turns *Aggressor's* CLEAVE up, or "we're low HP" makes the boss commit harder.
5. **Act** β the boss takes the top-scoring legal action (CLEAVE is masked while on cooldown). A small per-difficulty *mistake rate* adds the easy/normal/hard feel.
That whole loop is the **4-bar specialist panel + shared-latent strip** you see updating in the game β a live X-ray of the model thinking.
### How it learned
Trained by **self-play REINFORCE** (policy gradient + value baseline) in a headless duel
simulator: reward = *damage dealt β damage taken*, plus shaping that rewards pressuring in
range and punishes stalling. Over ~700 batches the win-rate climbed against a near-optimal
dodging opponent and the tactics β spacing, punishing recovery frames, blocking your punish,
enraging at low HP β **emerged**; none of it is hand-scripted. The **difficulty tiers are the
same trained brain at different decision-noise levels** (Easy makes more exploitable mistakes,
Hard plays sharp β0.95 win vs the dodger).
### Why the structure matters
- **Modular** β you can retrain or swap one specialist without touching the others (e.g. the **Defender/BLOCK** specialist was added later and the rest were untouched).
- **Explainable** β at any instant you can read *which* specialist drove the decision and how the modulators bent it.
- **Cheap** β specialists are small and run in parallel; the latent bridge is one tiny matmul.
### It finetunes from *your* fights (online learning)
Because the model is tiny, a gradient step is microseconds β so the boss can learn
from real play **on this CPU**. Every HARD-tier fight is logged (state, action, HP per
decision) and sent to a `/learn` endpoint; we rebuild the per-decision rewards (damage
dealt β taken, + kill / β death), compute REINFORCE returns, and take **one Adam step**
that nudges the HARD brain toward what worked against real humans β the backprop is
hand-written in numpy and verified against PyTorch to ~1e-8. A frozen copy of the
sim-trained weights is an **anchor** (gentle pull-back) so it can't drift into nonsense,
and with a `HF_TOKEN` + `MM_DATASET_REPO` secret the adapted weights persist to a
HuggingFace Dataset across Space restarts. (Only HARD fights train, so the adaptation data
stays on-policy.)
"""
USES_MD = r"""
## π Three real-world applications of this architecture
The reusable idea isn't "a boss" β it's **small, independently-trainable specialists that
coordinate through a shared latent instead of through brittle hand-written rules or one giant
monolithic model.** That pattern transfers well beyond games:
**1. On-device / edge robotics & IoT control.**
A drone, robot arm, or wearable can't run a huge policy. Give it a handful of tiny specialists
β *balance*, *obstacle-avoidance*, *navigation*, *battery/thermal management* β each cheap
enough for a microcontroller, coordinating through one shared latent. You can **add or replace
a specialist** (e.g., a new sensor) without retraining the whole stack, and the latent bridge
fuses their context in a single cheap step β exactly what this boss does at 2β4 Hz on a CPU.
**2. Explainable, designer-tunable AI for games & simulations.**
Studios want NPC/boss/crowd AI that's *steerable and inspectable*, not a black box. With this
pattern a designer can tune or hot-swap one behavior specialist (more aggressive, more cautious)
and **see exactly which specialist fired** for any decision β the same live panel shown here.
That makes balancing, debugging, and difficulty tuning tractable in ways a single end-to-end
policy isn't.
**3. Modular AI agents / mixture-of-specialists that talk in latent space.**
The original Modular Mind motivation: instead of an "agent chain" that re-serializes everything
to **text** at every hop (lossy, slow), let domain specialists β *math*, *code*, *retrieval*,
*safety/policy* β communicate through a **latent bridge** (`RecursiveLink` + a residual highway
for deep chains). A small language model can consult a math or tool specialist **without
flattening to tokens**, each specialist is trained/upgraded independently, and the system stays
auditable. Useful for cost-sensitive assistants, industrial decisioning (risk + liquidity +
fraud specialists), or clinical triage (modular diagnostic experts) where you must know *why*.
"""
PIANO_INTRO = """
### πΉ This may be bad: a self-playing piano β same Modular Mind method, trained on a song
Under the boss fight, the *same architecture* (tiny specialists β `RecursiveLink` β a coordinator)
applied to **playing piano in chords**. It was trained by **multi-note next-frame prediction** on a
*polyphonic* transcription of a song: six specialists (Bass / Tenor / Soprano registers + Sustain /
Onset / Phrase modulators) emit latents, the bridge merges them, and the coordinator picks the **set
of notes** to play next. It plays itself with **real recorded acoustic-piano samples**, and the
performance is **restyled live into A minor** β every note is lifted out of the bass register and
snapped to the minor scale before it reaches the keys. Press **play** and watch each note send a
glowing trail of light off the keyboard.
<sub>Rough by design β one song, a tiny model, crude polyphonic transcription β the *method carrying over* is the point.</sub>
"""
with gr.Blocks(title="Quazim0t0's π Thousand Token Wood Entry") as demo:
gr.Markdown(INTRO)
gr.HTML(INDEX_HTML)
gr.Markdown(PIANO_INTRO)
gr.HTML(PIANO_HTML)
gr.Markdown(REPO_MD)
with gr.Accordion("π§ How the Modular Mind works (technical breakdown)", open=False):
gr.Markdown(TECH_MD)
with gr.Accordion("π Three real-world applications", open=False):
gr.Markdown(USES_MD)
# the third application, made real: a live mixture-of-experts at the bottom
if build_moe_panel is not None:
build_moe_panel()
# hidden API plumbing: the browser calls /decide via the Gradio REST API
inp = gr.Textbox(visible=False)
out = gr.Textbox(visible=False)
trigger = gr.Button(visible=False)
trigger.click(decide, inp, out, api_name="decide")
linp = gr.Textbox(visible=False)
lout = gr.Textbox(visible=False)
ltrigger = gr.Button(visible=False)
ltrigger.click(learn, linp, lout, api_name="learn")
pinp = gr.Textbox(visible=False)
pout = gr.Textbox(visible=False)
ptrigger = gr.Button(visible=False)
ptrigger.click(piano, pinp, pout, api_name="piano")
if __name__ == "__main__":
demo.queue(default_concurrency_limit=8).launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", "7860")),
allowed_paths=[AUDIO_DIR, PIANO_SAMPLES_DIR],
# Gradio 6 moved these from the Blocks constructor to launch()
theme=gr.themes.Base(),
head=HEAD,
)
|