Spaces:
Running on Zero
Running on Zero
| """ | |
| app.py -- Modular Mind: Boss Fight (HuggingFace Space entry point). | |
| A 2D Dark-Souls-style duel. The boss (Demon Slime) is driven by a tiny Modular | |
| Mind: six specialist networks emit latents that a RecursiveLink merges into one | |
| shared latent, and a coordinator reads it to pick the boss's next move. The brain | |
| was trained by self-play reinforcement learning (see train.py / duel_sim.py). | |
| The browser renders the fight at 60fps; at each decision point it calls the Python | |
| brain through this app's /decide endpoint and shows the Modular Mind deciding live. | |
| """ | |
| import json | |
| import os | |
| import sys | |
| from urllib.parse import quote | |
| import gradio as gr | |
| import modular_mind | |
| import online | |
| # the MoE-experts experiment lives in ./agents (added to the bottom of the page) | |
| sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "agents")) | |
| try: | |
| from panel import build_moe_panel | |
| except Exception as _e: # agents optional -> game still runs without it | |
| build_moe_panel = None | |
| print(f"[app] MoE experiment panel unavailable ({_e})") | |
| HERE = os.path.dirname(os.path.abspath(__file__)) | |
| # the self-playing piano (a Modular Mind trained on a song) lives in ./piano | |
| sys.path.insert(0, os.path.join(HERE, "piano")) | |
| _PIANO = {"player": None, "tried": False} | |
| def _get_piano(): | |
| if not _PIANO["tried"]: | |
| _PIANO["tried"] = True | |
| try: | |
| from poly_mind import PolyPlayer | |
| _PIANO["player"] = PolyPlayer() | |
| except Exception as e: | |
| print(f"[app] piano Modular Mind unavailable ({e})") | |
| return _PIANO["player"] | |
| try: | |
| _pmeta = json.load(open(os.path.join(HERE, "piano", "poly_notes.json"))) | |
| PIANO_LO, PIANO_HI, PIANO_FPS = _pmeta["midi_lo"], _pmeta["midi_hi"], _pmeta.get("fps", 8) | |
| except Exception: | |
| PIANO_LO, PIANO_HI, PIANO_FPS = 56, 86, 8 | |
| # the performance is restyled live into A minor with the bass lifted away (see | |
| # piano/poly_mind.py stylize_midi), so the on-screen keyboard starts at middle C | |
| PIANO_LO = max(PIANO_LO, 60) | |
| _get_piano() # warm the piano Modular Mind at app startup (so the first play is instant) | |
| def _read(path): | |
| with open(path, "r", encoding="utf-8") as f: | |
| return f.read() | |
| CSS = _read(os.path.join(HERE, "web", "game.css")) | |
| GAME_JS = _read(os.path.join(HERE, "web", "game.js")) | |
| ASSETS_JS = _read(os.path.join(HERE, "assets_data.js")) | |
| INDEX_HTML = _read(os.path.join(HERE, "web", "index.html")) | |
| # music/sfx are served as static files by Gradio (allowed_paths below); the game | |
| # builds audio URLs from this base. | |
| AUDIO_DIR = os.path.join(HERE, "audio") | |
| # URL-encode the absolute path (it may contain spaces) but keep "/" and the drive ":" | |
| AUDIO_BASE_URL = ( | |
| "/gradio_api/file=" + quote(AUDIO_DIR.replace(os.sep, "/"), safe="/:") + "/" | |
| ) | |
| # real acoustic-grand-piano note samples (served static; the piano plays the nearest | |
| # sample pitch-shifted to each note, for a real piano sound instead of an oscillator). | |
| PIANO_SAMPLES_DIR = os.path.join(HERE, "piano", "samples") | |
| try: | |
| PIANO_SAMPLE_MIDIS = sorted(int(f[:-4]) for f in os.listdir(PIANO_SAMPLES_DIR) if f.endswith(".mp3")) | |
| except Exception: | |
| PIANO_SAMPLE_MIDIS = [] | |
| PIANO_SAMPLE_BASE = "/gradio_api/file=" + quote(PIANO_SAMPLES_DIR.replace(os.sep, "/"), safe="/:") + "/" | |
| # warm the default brain | |
| modular_mind.get_mind("hard") | |
| def decide(state_json: str) -> str: | |
| """Called by the browser at each boss decision point. In: game-state JSON | |
| (includes a "difficulty" tier). Out: chosen action + telemetry, as JSON.""" | |
| try: | |
| state = json.loads(state_json) | |
| except Exception: | |
| state = {} | |
| return json.dumps(modular_mind.decide(state)) | |
| def learn(traj_json: str) -> str: | |
| """Called by the browser at the end of a fight with the full decision trajectory | |
| + outcome. Buffers it and periodically finetunes the HARD brain (REINFORCE).""" | |
| try: | |
| traj = json.loads(traj_json) | |
| except Exception: | |
| return json.dumps({"error": "bad json"}) | |
| return json.dumps(online.record_fight(traj)) | |
| def piano(payload_json: str) -> str: | |
| """Called by the browser's self-playing piano: in = {history:[tokens], n}, out = | |
| {notes:[midi...], history:[...]}. The Modular Mind autoregressively generates the | |
| next `n` notes from the recent history (server-side; history kept client-side).""" | |
| try: | |
| req = json.loads(payload_json) | |
| except Exception: | |
| req = {} | |
| player = _get_piano() | |
| hist = list(req.get("history") or []) | |
| n = max(1, min(64, int(req.get("n", 32)))) | |
| if player is None: | |
| return json.dumps({"notes": [], "history": hist, "error": "piano unavailable"}) | |
| if not hist: | |
| hist = [list(f) for f in player.seed] | |
| frames, telem = [], [] | |
| for _ in range(n): | |
| toks, midis, tl = player.next_frame(hist) | |
| hist.append(toks); frames.append([int(x) for x in midis]); telem.append(tl) | |
| return json.dumps({"frames": frames, "telem": telem, | |
| "history": [list(map(int, f)) for f in hist[-player.K:]]}) | |
| # Bootstrap (runs in the browser): wire window.MM_DECIDE to this app's /decide | |
| # endpoint via Gradio's REST API (no external CDN), then boot the game once the | |
| # gr.HTML canvas is in the DOM. | |
| BOOTSTRAP_JS = """ | |
| (function () { | |
| // route each boss decision to the Python Modular Mind through /gradio_api/call | |
| window.MM_DECIDE = async (state) => { | |
| const post = await fetch('/gradio_api/call/decide', { | |
| method: 'POST', headers: {'Content-Type': 'application/json'}, | |
| body: JSON.stringify({data: [JSON.stringify(state)]}), | |
| }); | |
| const j = await post.json(); | |
| const res = await fetch('/gradio_api/call/decide/' + j.event_id); | |
| const text = await res.text(); | |
| const line = text.split('\\n').filter(l => l.startsWith('data:')).pop(); | |
| const arr = JSON.parse(line.slice(5).trim()); | |
| return JSON.parse(arr[0]); | |
| }; | |
| // send a finished fight's trajectory to the online learner (fire-and-forget) | |
| window.MM_LEARN = async (traj) => { | |
| try { | |
| const post = await fetch('/gradio_api/call/learn', { | |
| method: 'POST', headers: {'Content-Type': 'application/json'}, | |
| body: JSON.stringify({data: [JSON.stringify(traj)]}), | |
| }); | |
| const j = await post.json(); | |
| await fetch('/gradio_api/call/learn/' + j.event_id); | |
| } catch (e) { /* learning is best-effort */ } | |
| }; | |
| const tryBoot = () => { | |
| if (document.getElementById('mm-canvas') && window.__mmBoot) window.__mmBoot(); | |
| else setTimeout(tryBoot, 80); | |
| }; | |
| if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', tryBoot); | |
| else tryBoot(); | |
| })(); | |
| """ | |
| # Force Gradio dark mode (matches the dark game) regardless of the visitor's browser | |
| # setting, by ensuring the ?__theme=dark URL param is present before the app renders. | |
| FORCE_DARK_JS = """ | |
| (function () { | |
| try { | |
| var p = new URLSearchParams(window.location.search); | |
| if (p.get('__theme') !== 'dark') { | |
| p.set('__theme', 'dark'); | |
| window.location.replace(window.location.pathname + '?' + p.toString() + window.location.hash); | |
| } | |
| } catch (e) {} | |
| })(); | |
| """ | |
| # ---- self-playing piano (a Modular Mind trained on the song) ----------------- | |
| PIANO_CSS = """ | |
| #mm-piano-wrap{max-width:920px;margin:6px auto 2px;font-family:system-ui,sans-serif} | |
| #mm-piano-stage{position:relative;background:radial-gradient(ellipse at 50% 110%,#1b1430 0%,#0c0c14 62%,#08080e 100%); | |
| border:1px solid #2a2a35;border-radius:10px;padding:0 8px 12px;overflow-x:auto;overflow-y:hidden} | |
| #mm-piano-roll{display:block;width:100%;height:150px} | |
| #mm-piano{display:flex;align-items:flex-end;justify-content:center;gap:2px;height:112px} | |
| .pk{box-sizing:border-box;border:1px solid #05050a;border-radius:0 0 4px 4px;flex:0 0 auto; | |
| transition:background .07s ease,box-shadow .07s ease} | |
| .pk.white{width:20px;height:100px;background:linear-gradient(180deg,#f4f4f8 0%,#d6d6e0 88%,#b9b9c6 100%)} | |
| .pk.black{width:14px;height:62px;background:linear-gradient(180deg,#3a3a46 0%,#1b1b23 100%)} | |
| .pk.on{transform:translateY(1px)} | |
| #mm-piano-ctrl{display:flex;gap:14px;align-items:center;justify-content:center;margin:8px auto 4px} | |
| #mm-piano-btn{cursor:pointer;background:#2a9d6a;color:#fff;border:none;border-radius:6px; | |
| padding:9px 18px;font-weight:700;font-size:14px} | |
| #mm-piano-btn:hover{background:#33b87c} | |
| #mm-piano-note{color:#9bd;font-size:13px;min-width:96px;text-align:left} | |
| #mm-piano-specs{display:flex;gap:8px;justify-content:center;flex-wrap:wrap;margin:8px auto 2px;max-width:800px} | |
| .psp{width:110px;background:#16161e;border:1px solid #2a2a35;border-radius:6px;padding:6px 9px} | |
| .psp .nm{font-weight:700;font-size:12px} | |
| .psp .ow{opacity:.55;font-size:10px;color:#aaa;margin-top:1px} | |
| .psp .bar{height:7px;background:#2a2a33;border-radius:4px;margin-top:6px;overflow:hidden} | |
| .psp .fill{height:100%;width:4%;border-radius:4px;transition:width .12s ease} | |
| #mm-piano-lbl{text-align:center;color:#888;font-size:11px;margin-top:8px} | |
| #mm-piano-latent{display:flex;gap:3px;justify-content:center;align-items:flex-end;height:22px;margin:5px auto} | |
| #mm-piano-latent .lc{width:8px;height:3px;border-radius:2px;background:#3a3a48;transition:height .12s ease} | |
| """ | |
| PIANO_GLOBALS = (f"window.MM_PIANO_LO={PIANO_LO};window.MM_PIANO_HI={PIANO_HI};" | |
| f"window.MM_PIANO_FPS={PIANO_FPS};" | |
| f"window.MM_PIANO_SAMPLE_BASE={json.dumps(PIANO_SAMPLE_BASE)};" | |
| f"window.MM_PIANO_SAMPLE_MIDIS={json.dumps(PIANO_SAMPLE_MIDIS)};") | |
| PIANO_JS = r""" | |
| (function(){ | |
| var SPC={Bass:'#4da6ff',Tenor:'#2ecc71',Soprano:'#ff6b9d',Sustain:'#1abc9c',Rest:'#95a5a6',Onset:'#e67e22',Phrase:'#9b59b6'}; | |
| window.__pianoBoot = function(){ | |
| var wrap=document.getElementById('mm-piano'); | |
| if(!wrap || wrap.dataset.built) return; wrap.dataset.built='1'; | |
| var LO=window.MM_PIANO_LO||56, HI=window.MM_PIANO_HI||86, BLACK={1:1,3:1,6:1,8:1,10:1}; | |
| for(var m=LO;m<=HI;m++){var k=document.createElement('div'); | |
| k.className='pk '+(BLACK[m%12]?'black':'white'); k.id='pk-'+m; wrap.appendChild(k);} | |
| var audio=null, playing=false, queue=[], history=[], fetching=false, timer=null, voices={}; | |
| var specFills={}, built=false, buffers={}, loaded=false; | |
| var PLAY_MS=Math.round(1000/(window.MM_PIANO_FPS||8))+58; // a touch slower = calmer feel | |
| var noteEl=document.getElementById('mm-piano-note'), btn=document.getElementById('mm-piano-btn'); | |
| var specBox=document.getElementById('mm-piano-specs'), latBox=document.getElementById('mm-piano-latent'); | |
| var NN=['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']; | |
| function name(m){return NN[m%12]+(Math.floor(m/12)-1);} | |
| // ---- light show: glowing note trails rise off the keys while they sound ---- | |
| var roll=document.getElementById('mm-piano-roll'); | |
| var rctx=roll?roll.getContext('2d'):null, trails=[], sparks=[], keyTrail={}, rafOn=false; | |
| function hue(m){return ((m%12)*30+200)%360;} // pitch class -> color wheel | |
| function ensureRaf(){ if(rctx && !rafOn){ rafOn=true; requestAnimationFrame(draw); } } | |
| function draw(){ | |
| var w=roll.clientWidth, hgt=roll.clientHeight, now=performance.now(), v=0.05; | |
| if(roll.width!==w) roll.width=w; if(roll.height!==hgt) roll.height=hgt; | |
| rctx.clearRect(0,0,w,hgt); | |
| for(var i=trails.length-1;i>=0;i--){ var tr=trails[i]; | |
| var top=hgt-(now-tr.t0)*v, bot=hgt-(tr.t1?(now-tr.t1)*v:0); | |
| if(bot<-30){ trails.splice(i,1); continue; } | |
| top=Math.max(top,-30); | |
| var g=rctx.createLinearGradient(0,top,0,bot); | |
| g.addColorStop(0,'hsla('+tr.h+',85%,62%,0)'); | |
| g.addColorStop(1,'hsla('+tr.h+',85%,62%,0.9)'); | |
| rctx.shadowColor='hsl('+tr.h+',85%,60%)'; rctx.shadowBlur=10; | |
| rctx.fillStyle=g; rctx.fillRect(tr.x,top,tr.w,Math.max(2,bot-top)); | |
| } | |
| rctx.shadowBlur=0; | |
| for(var j=sparks.length-1;j>=0;j--){ var s=sparks[j], a=1-(now-s.t0)/650; | |
| if(a<=0){ sparks.splice(j,1); continue; } | |
| s.x+=s.vx; s.y+=s.vy; | |
| rctx.fillStyle='hsla('+s.h+',95%,72%,'+a.toFixed(2)+')'; | |
| rctx.fillRect(s.x,s.y,2.2,2.2); | |
| } | |
| if(!playing && !trails.length && !sparks.length){ rafOn=false; rctx.clearRect(0,0,w,hgt); return; } | |
| requestAnimationFrame(draw); | |
| } | |
| function strikeFx(m,el){ | |
| if(!rctx||!el) return; | |
| var x=el.offsetLeft-roll.offsetLeft, wd=el.offsetWidth, hh=hue(m), now=performance.now(); | |
| var tr={x:x,w:wd,h:hh,t0:now,t1:null}; | |
| trails.push(tr); keyTrail[m]=tr; | |
| for(var i=0;i<6;i++) sparks.push({x:x+wd/2,y:roll.clientHeight-3, | |
| vx:(Math.random()-0.5)*1.6, vy:-(0.6+Math.random()*1.4), h:hh, t0:now}); | |
| ensureRaf(); | |
| } | |
| function endTrail(m){ if(keyTrail[m]){ keyTrail[m].t1=performance.now(); delete keyTrail[m]; } } | |
| function lightKey(m,on){ | |
| var el=document.getElementById('pk-'+m); if(!el) return el; | |
| if(on){ el.classList.add('on'); var hh=hue(m); | |
| el.style.background='hsl('+hh+',82%,'+(BLACK[m%12]?'46%':'68%')+')'; | |
| el.style.boxShadow='0 0 18px hsl('+hh+',85%,60%)'; | |
| } else { el.classList.remove('on'); el.style.background=''; el.style.boxShadow=''; } | |
| return el; | |
| } | |
| function buildSpecs(telem){ | |
| if(built || !specBox || !telem) return; built=true; | |
| telem.spec.forEach(function(s){ | |
| var c=SPC[s.name]||'#888', card=document.createElement('div'); card.className='psp'; | |
| card.innerHTML='<div class="nm" style="color:'+c+'">'+s.name+'</div>'+ | |
| '<div class="ow">'+(s.owns?('owns '+s.owns):'modulator')+'</div>'+ | |
| '<div class="bar"><div class="fill" style="background:'+c+'"></div></div>'; | |
| specBox.appendChild(card); specFills[s.name]=card.querySelector('.fill'); | |
| }); | |
| if(latBox){ for(var i=0;i<8;i++){var lc=document.createElement('div'); lc.className='lc'; latBox.appendChild(lc);} } | |
| } | |
| function updateSpecs(telem){ | |
| if(!telem) return; buildSpecs(telem); | |
| telem.spec.forEach(function(s){ | |
| var h; | |
| if(s.owns!=null && s.drive!=null){ h=Math.abs(s.drive)/4.0*100; } // owners: by drive | |
| else { h=(s.act-16.0)/10.0*100; } // modulators: by latent pulse | |
| h=Math.max(4,Math.min(100,h)); | |
| if(specFills[s.name]) specFills[s.name].style.width=h+'%'; | |
| }); | |
| if(latBox && telem.shared){ var lc=latBox.children; | |
| for(var i=0;i<lc.length && i<telem.shared.length;i++){ | |
| lc[i].style.height=Math.max(2,Math.min(20,Math.abs(telem.shared[i])*9))+'px'; | |
| lc[i].style.background=telem.shared[i]>=0?'#5bbcdf':'#df7a5b'; | |
| } } | |
| } | |
| async function fetchPhrase(){ | |
| if(fetching) return; fetching=true; | |
| try{ | |
| var post=await fetch('/gradio_api/call/piano',{method:'POST', | |
| headers:{'Content-Type':'application/json'}, | |
| body:JSON.stringify({data:[JSON.stringify({history:history,n:32})]})}); | |
| var j=await post.json(); | |
| var res=await fetch('/gradio_api/call/piano/'+j.event_id); | |
| var text=await res.text(); | |
| var line=text.split('\n').filter(function(l){return l.indexOf('data:')===0;}).pop(); | |
| var out=JSON.parse(JSON.parse(line.slice(5).trim())[0]); | |
| history=out.history||history; | |
| var fr=out.frames||[]; for(var i=0;i<fr.length;i++) queue.push({f:fr[i], t:(out.telem&&out.telem[i])||null}); | |
| }catch(e){} | |
| fetching=false; | |
| } | |
| function loadSamples(){ | |
| if(loaded || !audio) return; loaded=true; // background load; play() upgrades to samples as they arrive | |
| var ms=window.MM_PIANO_SAMPLE_MIDIS||[], base=window.MM_PIANO_SAMPLE_BASE||''; | |
| ms.forEach(function(sm){ | |
| var ctl=('AbortController' in window)?new AbortController():null; | |
| var to=ctl?setTimeout(function(){ctl.abort();},8000):0; | |
| fetch(base+sm+'.mp3', ctl?{signal:ctl.signal}:{}).then(function(r){return r.arrayBuffer();}) | |
| .then(function(ab){audio.decodeAudioData(ab,function(buf){buffers[sm]=buf;},function(){});}) | |
| .catch(function(){}).finally(function(){if(to)clearTimeout(to);}); | |
| }); | |
| } | |
| function nearest(m){ var ks=Object.keys(buffers); if(!ks.length) return null; | |
| return ks.map(Number).reduce(function(a,b){return Math.abs(b-m)<Math.abs(a-m)?b:a;}); } | |
| function voice(m, vol){ // real sample if it's loaded, else an oscillator -> ALWAYS audible | |
| if(!audio) return null; | |
| var sm=nearest(m), t=audio.currentTime; | |
| if(sm!=null && buffers[sm]){ | |
| var src=audio.createBufferSource(); src.buffer=buffers[sm]; | |
| src.playbackRate.value=Math.pow(2,(m-sm)/12); | |
| var g=audio.createGain(); g.gain.value=vol; | |
| src.connect(g); g.connect(audio.destination); src.start(t); | |
| return {src:src, gain:g}; | |
| } | |
| var f=440*Math.pow(2,(m-69)/12); | |
| var o1=audio.createOscillator(); o1.type='triangle'; o1.frequency.value=f; | |
| var o2=audio.createOscillator(); o2.type='sine'; o2.frequency.value=f*2; | |
| var g2=audio.createGain(); g2.gain.value=0.18; | |
| var lp=audio.createBiquadFilter(); lp.type='lowpass'; lp.frequency.value=2600; | |
| var g=audio.createGain(); | |
| g.gain.setValueAtTime(0.0001,t); g.gain.exponentialRampToValueAtTime(vol,t+0.014); | |
| g.gain.exponentialRampToValueAtTime(Math.max(0.0001,vol*0.3),t+1.6); | |
| o1.connect(lp); o2.connect(g2); g2.connect(lp); lp.connect(g); g.connect(audio.destination); | |
| o1.start(t); o2.start(t); | |
| return {oscs:[o1,o2], gain:g}; | |
| } | |
| function releaseNode(nd){ | |
| if(!nd || !audio) return; var t=audio.currentTime; | |
| try{ nd.gain.gain.cancelScheduledValues(t); | |
| nd.gain.gain.setValueAtTime(Math.max(nd.gain.gain.value,0.0001),t); | |
| nd.gain.gain.linearRampToValueAtTime(0.0001,t+0.10); | |
| if(nd.src) nd.src.stop(t+0.13); if(nd.oscs) nd.oscs.forEach(function(o){o.stop(t+0.13);}); | |
| }catch(e){} | |
| } | |
| function releaseAll(){ | |
| for(var mk in voices){ releaseNode(voices[mk]); lightKey(+mk,false); endTrail(+mk); } | |
| voices={}; | |
| } | |
| function playFrame(midis){ // polyphony: strike new notes, hold sustained ones, release dropped ones | |
| var nw={}; (midis||[]).forEach(function(m){ if(m>0) nw[m]=1; }); | |
| for(var mk in voices){ if(!nw[mk]){ releaseNode(voices[mk]); | |
| lightKey(+mk,false); endTrail(+mk); delete voices[mk]; } } | |
| var on=Object.keys(nw), vol=on.length>2?0.5:0.65; | |
| on.forEach(function(ms){ var m=+ms; if(!voices[m]){ var v=voice(m,vol); if(v) voices[m]=v; | |
| strikeFx(m, lightKey(m,true)); } }); | |
| if(noteEl){ noteEl.textContent= on.length ? ('βͺ '+on.map(function(ms){return name(+ms);}).join(' ')) : 'βͺ (rest)'; } | |
| } | |
| function tick(){ if(!playing) return; if(queue.length<10 && !fetching) fetchPhrase(); | |
| if(queue.length>0){ var it=queue.shift(); playFrame(it.f); updateSpecs(it.t); } } | |
| function start(){ | |
| if(!audio) audio=new (window.AudioContext||window.webkitAudioContext)(); | |
| if(audio.state==='suspended'){ try{audio.resume();}catch(e){} } | |
| loadSamples(); // real piano loads in background; oscillator plays until then | |
| playing=true; btn.textContent='βΈ Pause'; ensureRaf(); | |
| if(queue.length===0) fetchPhrase(); | |
| if(!timer) timer=setInterval(tick, PLAY_MS); | |
| } | |
| function stop(){ playing=false; btn.textContent='βΆ Let the Modular Mind play'; releaseAll(); } | |
| btn.onclick=function(){ playing?stop():start(); }; | |
| }; | |
| var t=function(){ if(document.getElementById('mm-piano')) window.__pianoBoot(); else setTimeout(t,120); }; | |
| if(document.readyState==='loading') document.addEventListener('DOMContentLoaded',t); else t(); | |
| })(); | |
| """ | |
| PIANO_HTML = """ | |
| <div id="mm-piano-wrap"> | |
| <div id="mm-piano-stage"> | |
| <canvas id="mm-piano-roll"></canvas> | |
| <div id="mm-piano"></div> | |
| </div> | |
| <div id="mm-piano-ctrl"> | |
| <button id="mm-piano-btn">βΆ Let the Modular Mind play</button> | |
| <span id="mm-piano-note">βͺ</span> | |
| </div> | |
| <div id="mm-piano-lbl">restyled live into <b>A minor</b> β every note is lifted out of the bass and snapped to the minor scale Β· Bass / Tenor / Soprano own a register; Sustain / Onset / Phrase are modulators that only write to the shared latent</div> | |
| <div id="mm-piano-specs"></div> | |
| <div id="mm-piano-latent" title="RecursiveLink shared latent"></div> | |
| </div> | |
| """ | |
| # Injected verbatim into the page <head>: dark-mode forcer, stylesheet, embedded sprite | |
| # atlases, the game engine, the piano engine, and the bootstrap. (Inline <script> in <head> | |
| # runs reliably; gr.HTML's innerHTML scripts do not.) | |
| HEAD = ( | |
| f"<script>{FORCE_DARK_JS}</script>\n" | |
| f"<style>{CSS}</style>\n" | |
| f"<style>{PIANO_CSS}</style>\n" | |
| f"<script>{ASSETS_JS}</script>\n" | |
| f"<script>window.MM_AUDIO_BASE = {json.dumps(AUDIO_BASE_URL)};</script>\n" | |
| f"<script>{PIANO_GLOBALS}</script>\n" | |
| f"<script>{GAME_JS}</script>\n" | |
| f"<script>{PIANO_JS}</script>\n" | |
| f"<script>{BOOTSTRAP_JS}</script>\n" | |
| ) | |
| INTRO = """ | |
| # π Modular Mind | |
| A mini **Dark-Souls-style** duel where the boss is controlled by a **Modular Mind** β six tiny | |
| specialist networks that communicate through a **shared latent** (RecursiveLink) and a coordinator | |
| that picks each move. The brain was **trained by self-play reinforcement learning**, not scripted. | |
| Watch the right-hand panel: every boss decision shows which specialists fired and how the modulators | |
| steer the fight through the latent. **Click *Enter the Fog* and click the game once to focus, then play.** | |
| """ | |
| # ---- placeholder repo link (replace REPO_URL with your real GitHub URL) ------ | |
| REPO_URL = "#" # TODO: replace with the real repo | |
| REPO_MD = f""" | |
| """ | |
| TECH_MD = r""" | |
| ## π§ How the model works (technical breakdown) | |
| The boss brain is a faithful, **specialist-scale** implementation of the Modular Mind | |
| architecture β small enough (~**4,500 parameters**, pure-NumPy inference) to decide in | |
| well under a millisecond on a free CPU, yet structurally identical to the big idea: | |
| **many small domain specialists that communicate through one shared latent.** | |
| ### The pieces | |
| - **7 specialists** (tiny 2-layer MLPs). Five *own an action* and two *modulators* own none: | |
| | Specialist | Owns | Role | | |
| |---|---|---| | |
| | **Aggressor** | `CLEAVE` | attack when in range | | |
| | **Stalker** | `APPROACH` | close the distance | | |
| | **Survivor** | `RETREAT` | reset spacing when it can't swing | | |
| | **Baiter** | `IDLE` | wait / bait a whiff | | |
| | **Defender** | `BLOCK` | guard the player's melee when it can't punish | | |
| | **Punisher** | β *(modulator)* | detects "the player is open / recovering" | | |
| | **Enrage** | β *(modulator)* | detects "we're low on HP β go berserk" | | |
| - **`RecursiveLink`** β a ReGLU + residual block that merges the six latents into **one shared latent** (the "bridge"). | |
| - **Coordinator** β a linear read-out of the shared latent that nudges every action's score. | |
| ### What every specialist is doing *at one moment* (a single decision tick) | |
| A souls boss commits to one move at a time, so the brain only fires when the boss is free | |
| (~2β4 times/second). In that one forward pass, **all six specialists run in parallel**: | |
| 1. **Perceive** β the live game state is compressed to a **10-D feature vector** (distance, in-range?, boss HP, player HP, cooldown ready?, is the player attacking / recovering / blocking?). | |
| 2. **Specialise** β each specialist computes `h = tanh(WβΒ·features)` and emits a **latent vector** `zα΅’` (its "opinion"); the four action-owners also emit a scalar **drive** for their move. | |
| 3. **Communicate** β the six latents are summed and pushed through the **`RecursiveLink`** to form the **shared latent** `s`. This is the only channel the **modulators** have: *Punisher* writes "player is open" and *Enrage* writes "HP is low" into `s` β they cast no direct vote. | |
| 4. **Coordinate** β the **coordinator** reads `s` and produces a **modulation** added to each action's score. So `score(action) = (owner's drive) + (coordinator modulation)`. This is where "the player is open" turns *Aggressor's* CLEAVE up, or "we're low HP" makes the boss commit harder. | |
| 5. **Act** β the boss takes the top-scoring legal action (CLEAVE is masked while on cooldown). A small per-difficulty *mistake rate* adds the easy/normal/hard feel. | |
| That whole loop is the **4-bar specialist panel + shared-latent strip** you see updating in the game β a live X-ray of the model thinking. | |
| ### How it learned | |
| Trained by **self-play REINFORCE** (policy gradient + value baseline) in a headless duel | |
| simulator: reward = *damage dealt β damage taken*, plus shaping that rewards pressuring in | |
| range and punishes stalling. Over ~700 batches the win-rate climbed against a near-optimal | |
| dodging opponent and the tactics β spacing, punishing recovery frames, blocking your punish, | |
| enraging at low HP β **emerged**; none of it is hand-scripted. The **difficulty tiers are the | |
| same trained brain at different decision-noise levels** (Easy makes more exploitable mistakes, | |
| Hard plays sharp β0.95 win vs the dodger). | |
| ### Why the structure matters | |
| - **Modular** β you can retrain or swap one specialist without touching the others (e.g. the **Defender/BLOCK** specialist was added later and the rest were untouched). | |
| - **Explainable** β at any instant you can read *which* specialist drove the decision and how the modulators bent it. | |
| - **Cheap** β specialists are small and run in parallel; the latent bridge is one tiny matmul. | |
| ### It finetunes from *your* fights (online learning) | |
| Because the model is tiny, a gradient step is microseconds β so the boss can learn | |
| from real play **on this CPU**. Every HARD-tier fight is logged (state, action, HP per | |
| decision) and sent to a `/learn` endpoint; we rebuild the per-decision rewards (damage | |
| dealt β taken, + kill / β death), compute REINFORCE returns, and take **one Adam step** | |
| that nudges the HARD brain toward what worked against real humans β the backprop is | |
| hand-written in numpy and verified against PyTorch to ~1e-8. A frozen copy of the | |
| sim-trained weights is an **anchor** (gentle pull-back) so it can't drift into nonsense, | |
| and with a `HF_TOKEN` + `MM_DATASET_REPO` secret the adapted weights persist to a | |
| HuggingFace Dataset across Space restarts. (Only HARD fights train, so the adaptation data | |
| stays on-policy.) | |
| """ | |
| USES_MD = r""" | |
| ## π Three real-world applications of this architecture | |
| The reusable idea isn't "a boss" β it's **small, independently-trainable specialists that | |
| coordinate through a shared latent instead of through brittle hand-written rules or one giant | |
| monolithic model.** That pattern transfers well beyond games: | |
| **1. On-device / edge robotics & IoT control.** | |
| A drone, robot arm, or wearable can't run a huge policy. Give it a handful of tiny specialists | |
| β *balance*, *obstacle-avoidance*, *navigation*, *battery/thermal management* β each cheap | |
| enough for a microcontroller, coordinating through one shared latent. You can **add or replace | |
| a specialist** (e.g., a new sensor) without retraining the whole stack, and the latent bridge | |
| fuses their context in a single cheap step β exactly what this boss does at 2β4 Hz on a CPU. | |
| **2. Explainable, designer-tunable AI for games & simulations.** | |
| Studios want NPC/boss/crowd AI that's *steerable and inspectable*, not a black box. With this | |
| pattern a designer can tune or hot-swap one behavior specialist (more aggressive, more cautious) | |
| and **see exactly which specialist fired** for any decision β the same live panel shown here. | |
| That makes balancing, debugging, and difficulty tuning tractable in ways a single end-to-end | |
| policy isn't. | |
| **3. Modular AI agents / mixture-of-specialists that talk in latent space.** | |
| The original Modular Mind motivation: instead of an "agent chain" that re-serializes everything | |
| to **text** at every hop (lossy, slow), let domain specialists β *math*, *code*, *retrieval*, | |
| *safety/policy* β communicate through a **latent bridge** (`RecursiveLink` + a residual highway | |
| for deep chains). A small language model can consult a math or tool specialist **without | |
| flattening to tokens**, each specialist is trained/upgraded independently, and the system stays | |
| auditable. Useful for cost-sensitive assistants, industrial decisioning (risk + liquidity + | |
| fraud specialists), or clinical triage (modular diagnostic experts) where you must know *why*. | |
| """ | |
| PIANO_INTRO = """ | |
| ### πΉ This may be bad: a self-playing piano β same Modular Mind method, trained on a song | |
| Under the boss fight, the *same architecture* (tiny specialists β `RecursiveLink` β a coordinator) | |
| applied to **playing piano in chords**. It was trained by **multi-note next-frame prediction** on a | |
| *polyphonic* transcription of a song: six specialists (Bass / Tenor / Soprano registers + Sustain / | |
| Onset / Phrase modulators) emit latents, the bridge merges them, and the coordinator picks the **set | |
| of notes** to play next. It plays itself with **real recorded acoustic-piano samples**, and the | |
| performance is **restyled live into A minor** β every note is lifted out of the bass register and | |
| snapped to the minor scale before it reaches the keys. Press **play** and watch each note send a | |
| glowing trail of light off the keyboard. | |
| <sub>Rough by design β one song, a tiny model, crude polyphonic transcription β the *method carrying over* is the point.</sub> | |
| """ | |
| with gr.Blocks(title="Quazim0t0's π Thousand Token Wood Entry") as demo: | |
| gr.Markdown(INTRO) | |
| gr.HTML(INDEX_HTML) | |
| gr.Markdown(PIANO_INTRO) | |
| gr.HTML(PIANO_HTML) | |
| gr.Markdown(REPO_MD) | |
| with gr.Accordion("π§ How the Modular Mind works (technical breakdown)", open=False): | |
| gr.Markdown(TECH_MD) | |
| with gr.Accordion("π Three real-world applications", open=False): | |
| gr.Markdown(USES_MD) | |
| # the third application, made real: a live mixture-of-experts at the bottom | |
| if build_moe_panel is not None: | |
| build_moe_panel() | |
| # hidden API plumbing: the browser calls /decide via the Gradio REST API | |
| inp = gr.Textbox(visible=False) | |
| out = gr.Textbox(visible=False) | |
| trigger = gr.Button(visible=False) | |
| trigger.click(decide, inp, out, api_name="decide") | |
| linp = gr.Textbox(visible=False) | |
| lout = gr.Textbox(visible=False) | |
| ltrigger = gr.Button(visible=False) | |
| ltrigger.click(learn, linp, lout, api_name="learn") | |
| pinp = gr.Textbox(visible=False) | |
| pout = gr.Textbox(visible=False) | |
| ptrigger = gr.Button(visible=False) | |
| ptrigger.click(piano, pinp, pout, api_name="piano") | |
| if __name__ == "__main__": | |
| demo.queue(default_concurrency_limit=8).launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.environ.get("PORT", "7860")), | |
| allowed_paths=[AUDIO_DIR, PIANO_SAMPLES_DIR], | |
| # Gradio 6 moved these from the Blocks constructor to launch() | |
| theme=gr.themes.Base(), | |
| head=HEAD, | |
| ) | |