ModuleMind

Running on Zero

File size: 30,813 Bytes

"""
app.py -- Modular Mind: Boss Fight (HuggingFace Space entry point).

A 2D Dark-Souls-style duel. The boss (Demon Slime) is driven by a tiny Modular
Mind: six specialist networks emit latents that a RecursiveLink merges into one
shared latent, and a coordinator reads it to pick the boss's next move. The brain
was trained by self-play reinforcement learning (see train.py / duel_sim.py).

The browser renders the fight at 60fps; at each decision point it calls the Python
brain through this app's /decide endpoint and shows the Modular Mind deciding live.
"""
import json
import os
import sys
from urllib.parse import quote

import gradio as gr

import modular_mind
import online

# the MoE-experts experiment lives in ./agents (added to the bottom of the page)
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "agents"))
try:
    from panel import build_moe_panel
except Exception as _e:  # agents optional -> game still runs without it
    build_moe_panel = None
    print(f"[app] MoE experiment panel unavailable ({_e})")

HERE = os.path.dirname(os.path.abspath(__file__))

# the self-playing piano (a Modular Mind trained on a song) lives in ./piano
sys.path.insert(0, os.path.join(HERE, "piano"))
_PIANO = {"player": None, "tried": False}


def _get_piano():
    if not _PIANO["tried"]:
        _PIANO["tried"] = True
        try:
            from poly_mind import PolyPlayer
            _PIANO["player"] = PolyPlayer()
        except Exception as e:
            print(f"[app] piano Modular Mind unavailable ({e})")
    return _PIANO["player"]


try:
    _pmeta = json.load(open(os.path.join(HERE, "piano", "poly_notes.json")))
    PIANO_LO, PIANO_HI, PIANO_FPS = _pmeta["midi_lo"], _pmeta["midi_hi"], _pmeta.get("fps", 8)
except Exception:
    PIANO_LO, PIANO_HI, PIANO_FPS = 56, 86, 8

# the performance is restyled live into A minor with the bass lifted away (see
# piano/poly_mind.py stylize_midi), so the on-screen keyboard starts at middle C
PIANO_LO = max(PIANO_LO, 60)

_get_piano()   # warm the piano Modular Mind at app startup (so the first play is instant)


def _read(path):
    with open(path, "r", encoding="utf-8") as f:
        return f.read()


CSS = _read(os.path.join(HERE, "web", "game.css"))
GAME_JS = _read(os.path.join(HERE, "web", "game.js"))
ASSETS_JS = _read(os.path.join(HERE, "assets_data.js"))
INDEX_HTML = _read(os.path.join(HERE, "web", "index.html"))

# music/sfx are served as static files by Gradio (allowed_paths below); the game
# builds audio URLs from this base.
AUDIO_DIR = os.path.join(HERE, "audio")
# URL-encode the absolute path (it may contain spaces) but keep "/" and the drive ":"
AUDIO_BASE_URL = (
    "/gradio_api/file=" + quote(AUDIO_DIR.replace(os.sep, "/"), safe="/:") + "/"
)

# real acoustic-grand-piano note samples (served static; the piano plays the nearest
# sample pitch-shifted to each note, for a real piano sound instead of an oscillator).
PIANO_SAMPLES_DIR = os.path.join(HERE, "piano", "samples")
try:
    PIANO_SAMPLE_MIDIS = sorted(int(f[:-4]) for f in os.listdir(PIANO_SAMPLES_DIR) if f.endswith(".mp3"))
except Exception:
    PIANO_SAMPLE_MIDIS = []
PIANO_SAMPLE_BASE = "/gradio_api/file=" + quote(PIANO_SAMPLES_DIR.replace(os.sep, "/"), safe="/:") + "/"

# warm the default brain
modular_mind.get_mind("hard")


def decide(state_json: str) -> str:
    """Called by the browser at each boss decision point. In: game-state JSON
    (includes a "difficulty" tier). Out: chosen action + telemetry, as JSON."""
    try:
        state = json.loads(state_json)
    except Exception:
        state = {}
    return json.dumps(modular_mind.decide(state))


def learn(traj_json: str) -> str:
    """Called by the browser at the end of a fight with the full decision trajectory
    + outcome. Buffers it and periodically finetunes the HARD brain (REINFORCE)."""
    try:
        traj = json.loads(traj_json)
    except Exception:
        return json.dumps({"error": "bad json"})
    return json.dumps(online.record_fight(traj))


def piano(payload_json: str) -> str:
    """Called by the browser's self-playing piano: in = {history:[tokens], n}, out =
    {notes:[midi...], history:[...]}. The Modular Mind autoregressively generates the
    next `n` notes from the recent history (server-side; history kept client-side)."""
    try:
        req = json.loads(payload_json)
    except Exception:
        req = {}
    player = _get_piano()
    hist = list(req.get("history") or [])
    n = max(1, min(64, int(req.get("n", 32))))
    if player is None:
        return json.dumps({"notes": [], "history": hist, "error": "piano unavailable"})
    if not hist:
        hist = [list(f) for f in player.seed]
    frames, telem = [], []
    for _ in range(n):
        toks, midis, tl = player.next_frame(hist)
        hist.append(toks); frames.append([int(x) for x in midis]); telem.append(tl)
    return json.dumps({"frames": frames, "telem": telem,
                       "history": [list(map(int, f)) for f in hist[-player.K:]]})


# Bootstrap (runs in the browser): wire window.MM_DECIDE to this app's /decide
# endpoint via Gradio's REST API (no external CDN), then boot the game once the
# gr.HTML canvas is in the DOM.
BOOTSTRAP_JS = """
(function () {
  // route each boss decision to the Python Modular Mind through /gradio_api/call
  window.MM_DECIDE = async (state) => {
    const post = await fetch('/gradio_api/call/decide', {
      method: 'POST', headers: {'Content-Type': 'application/json'},
      body: JSON.stringify({data: [JSON.stringify(state)]}),
    });
    const j = await post.json();
    const res = await fetch('/gradio_api/call/decide/' + j.event_id);
    const text = await res.text();
    const line = text.split('\\n').filter(l => l.startsWith('data:')).pop();
    const arr = JSON.parse(line.slice(5).trim());
    return JSON.parse(arr[0]);
  };
  // send a finished fight's trajectory to the online learner (fire-and-forget)
  window.MM_LEARN = async (traj) => {
    try {
      const post = await fetch('/gradio_api/call/learn', {
        method: 'POST', headers: {'Content-Type': 'application/json'},
        body: JSON.stringify({data: [JSON.stringify(traj)]}),
      });
      const j = await post.json();
      await fetch('/gradio_api/call/learn/' + j.event_id);
    } catch (e) { /* learning is best-effort */ }
  };
  const tryBoot = () => {
    if (document.getElementById('mm-canvas') && window.__mmBoot) window.__mmBoot();
    else setTimeout(tryBoot, 80);
  };
  if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', tryBoot);
  else tryBoot();
})();
"""

# Force Gradio dark mode (matches the dark game) regardless of the visitor's browser
# setting, by ensuring the ?__theme=dark URL param is present before the app renders.
FORCE_DARK_JS = """
(function () {
  try {
    var p = new URLSearchParams(window.location.search);
    if (p.get('__theme') !== 'dark') {
      p.set('__theme', 'dark');
      window.location.replace(window.location.pathname + '?' + p.toString() + window.location.hash);
    }
  } catch (e) {}
})();
"""

# ---- self-playing piano (a Modular Mind trained on the song) -----------------
PIANO_CSS = """
#mm-piano-wrap{max-width:920px;margin:6px auto 2px;font-family:system-ui,sans-serif}
#mm-piano-stage{position:relative;background:radial-gradient(ellipse at 50% 110%,#1b1430 0%,#0c0c14 62%,#08080e 100%);
  border:1px solid #2a2a35;border-radius:10px;padding:0 8px 12px;overflow-x:auto;overflow-y:hidden}
#mm-piano-roll{display:block;width:100%;height:150px}
#mm-piano{display:flex;align-items:flex-end;justify-content:center;gap:2px;height:112px}
.pk{box-sizing:border-box;border:1px solid #05050a;border-radius:0 0 4px 4px;flex:0 0 auto;
  transition:background .07s ease,box-shadow .07s ease}
.pk.white{width:20px;height:100px;background:linear-gradient(180deg,#f4f4f8 0%,#d6d6e0 88%,#b9b9c6 100%)}
.pk.black{width:14px;height:62px;background:linear-gradient(180deg,#3a3a46 0%,#1b1b23 100%)}
.pk.on{transform:translateY(1px)}
#mm-piano-ctrl{display:flex;gap:14px;align-items:center;justify-content:center;margin:8px auto 4px}
#mm-piano-btn{cursor:pointer;background:#2a9d6a;color:#fff;border:none;border-radius:6px;
  padding:9px 18px;font-weight:700;font-size:14px}
#mm-piano-btn:hover{background:#33b87c}
#mm-piano-note{color:#9bd;font-size:13px;min-width:96px;text-align:left}
#mm-piano-specs{display:flex;gap:8px;justify-content:center;flex-wrap:wrap;margin:8px auto 2px;max-width:800px}
.psp{width:110px;background:#16161e;border:1px solid #2a2a35;border-radius:6px;padding:6px 9px}
.psp .nm{font-weight:700;font-size:12px}
.psp .ow{opacity:.55;font-size:10px;color:#aaa;margin-top:1px}
.psp .bar{height:7px;background:#2a2a33;border-radius:4px;margin-top:6px;overflow:hidden}
.psp .fill{height:100%;width:4%;border-radius:4px;transition:width .12s ease}
#mm-piano-lbl{text-align:center;color:#888;font-size:11px;margin-top:8px}
#mm-piano-latent{display:flex;gap:3px;justify-content:center;align-items:flex-end;height:22px;margin:5px auto}
#mm-piano-latent .lc{width:8px;height:3px;border-radius:2px;background:#3a3a48;transition:height .12s ease}
"""
PIANO_GLOBALS = (f"window.MM_PIANO_LO={PIANO_LO};window.MM_PIANO_HI={PIANO_HI};"
                 f"window.MM_PIANO_FPS={PIANO_FPS};"
                 f"window.MM_PIANO_SAMPLE_BASE={json.dumps(PIANO_SAMPLE_BASE)};"
                 f"window.MM_PIANO_SAMPLE_MIDIS={json.dumps(PIANO_SAMPLE_MIDIS)};")
PIANO_JS = r"""
(function(){
  var SPC={Bass:'#4da6ff',Tenor:'#2ecc71',Soprano:'#ff6b9d',Sustain:'#1abc9c',Rest:'#95a5a6',Onset:'#e67e22',Phrase:'#9b59b6'};
  window.__pianoBoot = function(){
    var wrap=document.getElementById('mm-piano');
    if(!wrap || wrap.dataset.built) return; wrap.dataset.built='1';
    var LO=window.MM_PIANO_LO||56, HI=window.MM_PIANO_HI||86, BLACK={1:1,3:1,6:1,8:1,10:1};
    for(var m=LO;m<=HI;m++){var k=document.createElement('div');
      k.className='pk '+(BLACK[m%12]?'black':'white'); k.id='pk-'+m; wrap.appendChild(k);}
    var audio=null, playing=false, queue=[], history=[], fetching=false, timer=null, voices={};
    var specFills={}, built=false, buffers={}, loaded=false;
    var PLAY_MS=Math.round(1000/(window.MM_PIANO_FPS||8))+58;     // a touch slower = calmer feel
    var noteEl=document.getElementById('mm-piano-note'), btn=document.getElementById('mm-piano-btn');
    var specBox=document.getElementById('mm-piano-specs'), latBox=document.getElementById('mm-piano-latent');
    var NN=['C','C#','D','D#','E','F','F#','G','G#','A','A#','B'];
    function name(m){return NN[m%12]+(Math.floor(m/12)-1);}
    // ---- light show: glowing note trails rise off the keys while they sound ----
    var roll=document.getElementById('mm-piano-roll');
    var rctx=roll?roll.getContext('2d'):null, trails=[], sparks=[], keyTrail={}, rafOn=false;
    function hue(m){return ((m%12)*30+200)%360;}      // pitch class -> color wheel
    function ensureRaf(){ if(rctx && !rafOn){ rafOn=true; requestAnimationFrame(draw); } }
    function draw(){
      var w=roll.clientWidth, hgt=roll.clientHeight, now=performance.now(), v=0.05;
      if(roll.width!==w) roll.width=w; if(roll.height!==hgt) roll.height=hgt;
      rctx.clearRect(0,0,w,hgt);
      for(var i=trails.length-1;i>=0;i--){ var tr=trails[i];
        var top=hgt-(now-tr.t0)*v, bot=hgt-(tr.t1?(now-tr.t1)*v:0);
        if(bot<-30){ trails.splice(i,1); continue; }
        top=Math.max(top,-30);
        var g=rctx.createLinearGradient(0,top,0,bot);
        g.addColorStop(0,'hsla('+tr.h+',85%,62%,0)');
        g.addColorStop(1,'hsla('+tr.h+',85%,62%,0.9)');
        rctx.shadowColor='hsl('+tr.h+',85%,60%)'; rctx.shadowBlur=10;
        rctx.fillStyle=g; rctx.fillRect(tr.x,top,tr.w,Math.max(2,bot-top));
      }
      rctx.shadowBlur=0;
      for(var j=sparks.length-1;j>=0;j--){ var s=sparks[j], a=1-(now-s.t0)/650;
        if(a<=0){ sparks.splice(j,1); continue; }
        s.x+=s.vx; s.y+=s.vy;
        rctx.fillStyle='hsla('+s.h+',95%,72%,'+a.toFixed(2)+')';
        rctx.fillRect(s.x,s.y,2.2,2.2);
      }
      if(!playing && !trails.length && !sparks.length){ rafOn=false; rctx.clearRect(0,0,w,hgt); return; }
      requestAnimationFrame(draw);
    }
    function strikeFx(m,el){
      if(!rctx||!el) return;
      var x=el.offsetLeft-roll.offsetLeft, wd=el.offsetWidth, hh=hue(m), now=performance.now();
      var tr={x:x,w:wd,h:hh,t0:now,t1:null};
      trails.push(tr); keyTrail[m]=tr;
      for(var i=0;i<6;i++) sparks.push({x:x+wd/2,y:roll.clientHeight-3,
        vx:(Math.random()-0.5)*1.6, vy:-(0.6+Math.random()*1.4), h:hh, t0:now});
      ensureRaf();
    }
    function endTrail(m){ if(keyTrail[m]){ keyTrail[m].t1=performance.now(); delete keyTrail[m]; } }
    function lightKey(m,on){
      var el=document.getElementById('pk-'+m); if(!el) return el;
      if(on){ el.classList.add('on'); var hh=hue(m);
        el.style.background='hsl('+hh+',82%,'+(BLACK[m%12]?'46%':'68%')+')';
        el.style.boxShadow='0 0 18px hsl('+hh+',85%,60%)';
      } else { el.classList.remove('on'); el.style.background=''; el.style.boxShadow=''; }
      return el;
    }
    function buildSpecs(telem){
      if(built || !specBox || !telem) return; built=true;
      telem.spec.forEach(function(s){
        var c=SPC[s.name]||'#888', card=document.createElement('div'); card.className='psp';
        card.innerHTML='<div class="nm" style="color:'+c+'">'+s.name+'</div>'+
          '<div class="ow">'+(s.owns?('owns '+s.owns):'modulator')+'</div>'+
          '<div class="bar"><div class="fill" style="background:'+c+'"></div></div>';
        specBox.appendChild(card); specFills[s.name]=card.querySelector('.fill');
      });
      if(latBox){ for(var i=0;i<8;i++){var lc=document.createElement('div'); lc.className='lc'; latBox.appendChild(lc);} }
    }
    function updateSpecs(telem){
      if(!telem) return; buildSpecs(telem);
      telem.spec.forEach(function(s){
        var h;
        if(s.owns!=null && s.drive!=null){ h=Math.abs(s.drive)/4.0*100; }   // owners: by drive
        else { h=(s.act-16.0)/10.0*100; }                                   // modulators: by latent pulse
        h=Math.max(4,Math.min(100,h));
        if(specFills[s.name]) specFills[s.name].style.width=h+'%';
      });
      if(latBox && telem.shared){ var lc=latBox.children;
        for(var i=0;i<lc.length && i<telem.shared.length;i++){
          lc[i].style.height=Math.max(2,Math.min(20,Math.abs(telem.shared[i])*9))+'px';
          lc[i].style.background=telem.shared[i]>=0?'#5bbcdf':'#df7a5b';
        } }
    }
    async function fetchPhrase(){
      if(fetching) return; fetching=true;
      try{
        var post=await fetch('/gradio_api/call/piano',{method:'POST',
          headers:{'Content-Type':'application/json'},
          body:JSON.stringify({data:[JSON.stringify({history:history,n:32})]})});
        var j=await post.json();
        var res=await fetch('/gradio_api/call/piano/'+j.event_id);
        var text=await res.text();
        var line=text.split('\n').filter(function(l){return l.indexOf('data:')===0;}).pop();
        var out=JSON.parse(JSON.parse(line.slice(5).trim())[0]);
        history=out.history||history;
        var fr=out.frames||[]; for(var i=0;i<fr.length;i++) queue.push({f:fr[i], t:(out.telem&&out.telem[i])||null});
      }catch(e){}
      fetching=false;
    }
    function loadSamples(){
      if(loaded || !audio) return; loaded=true;          // background load; play() upgrades to samples as they arrive
      var ms=window.MM_PIANO_SAMPLE_MIDIS||[], base=window.MM_PIANO_SAMPLE_BASE||'';
      ms.forEach(function(sm){
        var ctl=('AbortController' in window)?new AbortController():null;
        var to=ctl?setTimeout(function(){ctl.abort();},8000):0;
        fetch(base+sm+'.mp3', ctl?{signal:ctl.signal}:{}).then(function(r){return r.arrayBuffer();})
          .then(function(ab){audio.decodeAudioData(ab,function(buf){buffers[sm]=buf;},function(){});})
          .catch(function(){}).finally(function(){if(to)clearTimeout(to);});
      });
    }
    function nearest(m){ var ks=Object.keys(buffers); if(!ks.length) return null;
      return ks.map(Number).reduce(function(a,b){return Math.abs(b-m)<Math.abs(a-m)?b:a;}); }
    function voice(m, vol){          // real sample if it's loaded, else an oscillator -> ALWAYS audible
      if(!audio) return null;
      var sm=nearest(m), t=audio.currentTime;
      if(sm!=null && buffers[sm]){
        var src=audio.createBufferSource(); src.buffer=buffers[sm];
        src.playbackRate.value=Math.pow(2,(m-sm)/12);
        var g=audio.createGain(); g.gain.value=vol;
        src.connect(g); g.connect(audio.destination); src.start(t);
        return {src:src, gain:g};
      }
      var f=440*Math.pow(2,(m-69)/12);
      var o1=audio.createOscillator(); o1.type='triangle'; o1.frequency.value=f;
      var o2=audio.createOscillator(); o2.type='sine'; o2.frequency.value=f*2;
      var g2=audio.createGain(); g2.gain.value=0.18;
      var lp=audio.createBiquadFilter(); lp.type='lowpass'; lp.frequency.value=2600;
      var g=audio.createGain();
      g.gain.setValueAtTime(0.0001,t); g.gain.exponentialRampToValueAtTime(vol,t+0.014);
      g.gain.exponentialRampToValueAtTime(Math.max(0.0001,vol*0.3),t+1.6);
      o1.connect(lp); o2.connect(g2); g2.connect(lp); lp.connect(g); g.connect(audio.destination);
      o1.start(t); o2.start(t);
      return {oscs:[o1,o2], gain:g};
    }
    function releaseNode(nd){
      if(!nd || !audio) return; var t=audio.currentTime;
      try{ nd.gain.gain.cancelScheduledValues(t);
           nd.gain.gain.setValueAtTime(Math.max(nd.gain.gain.value,0.0001),t);
           nd.gain.gain.linearRampToValueAtTime(0.0001,t+0.10);
           if(nd.src) nd.src.stop(t+0.13); if(nd.oscs) nd.oscs.forEach(function(o){o.stop(t+0.13);});
      }catch(e){}
    }
    function releaseAll(){
      for(var mk in voices){ releaseNode(voices[mk]); lightKey(+mk,false); endTrail(+mk); }
      voices={};
    }
    function playFrame(midis){     // polyphony: strike new notes, hold sustained ones, release dropped ones
      var nw={}; (midis||[]).forEach(function(m){ if(m>0) nw[m]=1; });
      for(var mk in voices){ if(!nw[mk]){ releaseNode(voices[mk]);
        lightKey(+mk,false); endTrail(+mk); delete voices[mk]; } }
      var on=Object.keys(nw), vol=on.length>2?0.5:0.65;
      on.forEach(function(ms){ var m=+ms; if(!voices[m]){ var v=voice(m,vol); if(v) voices[m]=v;
        strikeFx(m, lightKey(m,true)); } });
      if(noteEl){ noteEl.textContent= on.length ? ('♪ '+on.map(function(ms){return name(+ms);}).join(' ')) : '♪ (rest)'; }
    }
    function tick(){ if(!playing) return; if(queue.length<10 && !fetching) fetchPhrase();
      if(queue.length>0){ var it=queue.shift(); playFrame(it.f); updateSpecs(it.t); } }
    function start(){
      if(!audio) audio=new (window.AudioContext||window.webkitAudioContext)();
      if(audio.state==='suspended'){ try{audio.resume();}catch(e){} }
      loadSamples();                          // real piano loads in background; oscillator plays until then
      playing=true; btn.textContent='⏸ Pause'; ensureRaf();
      if(queue.length===0) fetchPhrase();
      if(!timer) timer=setInterval(tick, PLAY_MS);
    }
    function stop(){ playing=false; btn.textContent='▶ Let the Modular Mind play'; releaseAll(); }
    btn.onclick=function(){ playing?stop():start(); };
  };
  var t=function(){ if(document.getElementById('mm-piano')) window.__pianoBoot(); else setTimeout(t,120); };
  if(document.readyState==='loading') document.addEventListener('DOMContentLoaded',t); else t();
})();
"""
PIANO_HTML = """
<div id="mm-piano-wrap">
  <div id="mm-piano-stage">
    <canvas id="mm-piano-roll"></canvas>
    <div id="mm-piano"></div>
  </div>
  <div id="mm-piano-ctrl">
    <button id="mm-piano-btn">▶ Let the Modular Mind play</button>
    <span id="mm-piano-note">♪</span>
  </div>
  <div id="mm-piano-lbl">restyled live into <b>A minor</b> — every note is lifted out of the bass and snapped to the minor scale · Bass / Tenor / Soprano own a register; Sustain / Onset / Phrase are modulators that only write to the shared latent</div>
  <div id="mm-piano-specs"></div>
  <div id="mm-piano-latent" title="RecursiveLink shared latent"></div>
</div>
"""

# Injected verbatim into the page <head>: dark-mode forcer, stylesheet, embedded sprite
# atlases, the game engine, the piano engine, and the bootstrap. (Inline <script> in <head>
# runs reliably; gr.HTML's innerHTML scripts do not.)
HEAD = (
    f"<script>{FORCE_DARK_JS}</script>\n"
    f"<style>{CSS}</style>\n"
    f"<style>{PIANO_CSS}</style>\n"
    f"<script>{ASSETS_JS}</script>\n"
    f"<script>window.MM_AUDIO_BASE = {json.dumps(AUDIO_BASE_URL)};</script>\n"
    f"<script>{PIANO_GLOBALS}</script>\n"
    f"<script>{GAME_JS}</script>\n"
    f"<script>{PIANO_JS}</script>\n"
    f"<script>{BOOTSTRAP_JS}</script>\n"
)

INTRO = """
# 🍄 Modular Mind
A mini **Dark-Souls-style** duel where the boss is controlled by a **Modular Mind** — six tiny
specialist networks that communicate through a **shared latent** (RecursiveLink) and a coordinator
that picks each move. The brain was **trained by self-play reinforcement learning**, not scripted.
Watch the right-hand panel: every boss decision shows which specialists fired and how the modulators
steer the fight through the latent. **Click *Enter the Fog* and click the game once to focus, then play.**
"""

# ---- placeholder repo link (replace REPO_URL with your real GitHub URL) ------
REPO_URL = "#"  # TODO: replace with the real repo
REPO_MD = f"""
"""

TECH_MD = r"""
## 🧠 How the model works (technical breakdown)

The boss brain is a faithful, **specialist-scale** implementation of the Modular Mind
architecture — small enough (~**4,500 parameters**, pure-NumPy inference) to decide in
well under a millisecond on a free CPU, yet structurally identical to the big idea:
**many small domain specialists that communicate through one shared latent.**

### The pieces
- **7 specialists** (tiny 2-layer MLPs). Five *own an action* and two *modulators* own none:
  | Specialist | Owns | Role |
  |---|---|---|
  | **Aggressor** | `CLEAVE` | attack when in range |
  | **Stalker** | `APPROACH` | close the distance |
  | **Survivor** | `RETREAT` | reset spacing when it can't swing |
  | **Baiter** | `IDLE` | wait / bait a whiff |
  | **Defender** | `BLOCK` | guard the player's melee when it can't punish |
  | **Punisher** | — *(modulator)* | detects "the player is open / recovering" |
  | **Enrage** | — *(modulator)* | detects "we're low on HP → go berserk" |
- **`RecursiveLink`** — a ReGLU + residual block that merges the six latents into **one shared latent** (the "bridge").
- **Coordinator** — a linear read-out of the shared latent that nudges every action's score.

### What every specialist is doing *at one moment* (a single decision tick)
A souls boss commits to one move at a time, so the brain only fires when the boss is free
(~2–4 times/second). In that one forward pass, **all six specialists run in parallel**:

1. **Perceive** — the live game state is compressed to a **10-D feature vector** (distance, in-range?, boss HP, player HP, cooldown ready?, is the player attacking / recovering / blocking?).
2. **Specialise** — each specialist computes `h = tanh(W₁·features)` and emits a **latent vector** `zᵢ` (its "opinion"); the four action-owners also emit a scalar **drive** for their move.
3. **Communicate** — the six latents are summed and pushed through the **`RecursiveLink`** to form the **shared latent** `s`. This is the only channel the **modulators** have: *Punisher* writes "player is open" and *Enrage* writes "HP is low" into `s` — they cast no direct vote.
4. **Coordinate** — the **coordinator** reads `s` and produces a **modulation** added to each action's score. So `score(action) = (owner's drive) + (coordinator modulation)`. This is where "the player is open" turns *Aggressor's* CLEAVE up, or "we're low HP" makes the boss commit harder.
5. **Act** — the boss takes the top-scoring legal action (CLEAVE is masked while on cooldown). A small per-difficulty *mistake rate* adds the easy/normal/hard feel.

That whole loop is the **4-bar specialist panel + shared-latent strip** you see updating in the game — a live X-ray of the model thinking.

### How it learned
Trained by **self-play REINFORCE** (policy gradient + value baseline) in a headless duel
simulator: reward = *damage dealt − damage taken*, plus shaping that rewards pressuring in
range and punishes stalling. Over ~700 batches the win-rate climbed against a near-optimal
dodging opponent and the tactics — spacing, punishing recovery frames, blocking your punish,
enraging at low HP — **emerged**; none of it is hand-scripted. The **difficulty tiers are the
same trained brain at different decision-noise levels** (Easy makes more exploitable mistakes,
Hard plays sharp ≈0.95 win vs the dodger).

### Why the structure matters
- **Modular** — you can retrain or swap one specialist without touching the others (e.g. the **Defender/BLOCK** specialist was added later and the rest were untouched).
- **Explainable** — at any instant you can read *which* specialist drove the decision and how the modulators bent it.
- **Cheap** — specialists are small and run in parallel; the latent bridge is one tiny matmul.

### It finetunes from *your* fights (online learning)
Because the model is tiny, a gradient step is microseconds — so the boss can learn
from real play **on this CPU**. Every HARD-tier fight is logged (state, action, HP per
decision) and sent to a `/learn` endpoint; we rebuild the per-decision rewards (damage
dealt − taken, + kill / − death), compute REINFORCE returns, and take **one Adam step**
that nudges the HARD brain toward what worked against real humans — the backprop is
hand-written in numpy and verified against PyTorch to ~1e-8. A frozen copy of the
sim-trained weights is an **anchor** (gentle pull-back) so it can't drift into nonsense,
and with a `HF_TOKEN` + `MM_DATASET_REPO` secret the adapted weights persist to a
HuggingFace Dataset across Space restarts. (Only HARD fights train, so the adaptation data
stays on-policy.)
"""

USES_MD = r"""
## 🌍 Three real-world applications of this architecture

The reusable idea isn't "a boss" — it's **small, independently-trainable specialists that
coordinate through a shared latent instead of through brittle hand-written rules or one giant
monolithic model.** That pattern transfers well beyond games:

**1. On-device / edge robotics & IoT control.**
A drone, robot arm, or wearable can't run a huge policy. Give it a handful of tiny specialists
— *balance*, *obstacle-avoidance*, *navigation*, *battery/thermal management* — each cheap
enough for a microcontroller, coordinating through one shared latent. You can **add or replace
a specialist** (e.g., a new sensor) without retraining the whole stack, and the latent bridge
fuses their context in a single cheap step — exactly what this boss does at 2–4 Hz on a CPU.

**2. Explainable, designer-tunable AI for games & simulations.**
Studios want NPC/boss/crowd AI that's *steerable and inspectable*, not a black box. With this
pattern a designer can tune or hot-swap one behavior specialist (more aggressive, more cautious)
and **see exactly which specialist fired** for any decision — the same live panel shown here.
That makes balancing, debugging, and difficulty tuning tractable in ways a single end-to-end
policy isn't.

**3. Modular AI agents / mixture-of-specialists that talk in latent space.**
The original Modular Mind motivation: instead of an "agent chain" that re-serializes everything
to **text** at every hop (lossy, slow), let domain specialists — *math*, *code*, *retrieval*,
*safety/policy* — communicate through a **latent bridge** (`RecursiveLink` + a residual highway
for deep chains). A small language model can consult a math or tool specialist **without
flattening to tokens**, each specialist is trained/upgraded independently, and the system stays
auditable. Useful for cost-sensitive assistants, industrial decisioning (risk + liquidity +
fraud specialists), or clinical triage (modular diagnostic experts) where you must know *why*.
"""


PIANO_INTRO = """
### 🎹 This may be bad: a self-playing piano — same Modular Mind method, trained on a song
Under the boss fight, the *same architecture* (tiny specialists → `RecursiveLink` → a coordinator)
applied to **playing piano in chords**. It was trained by **multi-note next-frame prediction** on a
*polyphonic* transcription of a song: six specialists (Bass / Tenor / Soprano registers + Sustain /
Onset / Phrase modulators) emit latents, the bridge merges them, and the coordinator picks the **set
of notes** to play next. It plays itself with **real recorded acoustic-piano samples**, and the
performance is **restyled live into A minor** — every note is lifted out of the bass register and
snapped to the minor scale before it reaches the keys. Press **play** and watch each note send a
glowing trail of light off the keyboard.
<sub>Rough by design — one song, a tiny model, crude polyphonic transcription — the *method carrying over* is the point.</sub>
"""

with gr.Blocks(title="Quazim0t0's 🍄 Thousand Token Wood Entry") as demo:
    gr.Markdown(INTRO)
    gr.HTML(INDEX_HTML)

    gr.Markdown(PIANO_INTRO)
    gr.HTML(PIANO_HTML)

    gr.Markdown(REPO_MD)
    with gr.Accordion("🧠 How the Modular Mind works (technical breakdown)", open=False):
        gr.Markdown(TECH_MD)
    with gr.Accordion("🌍 Three real-world applications", open=False):
        gr.Markdown(USES_MD)

    # the third application, made real: a live mixture-of-experts at the bottom
    if build_moe_panel is not None:
        build_moe_panel()

    # hidden API plumbing: the browser calls /decide via the Gradio REST API
    inp = gr.Textbox(visible=False)
    out = gr.Textbox(visible=False)
    trigger = gr.Button(visible=False)
    trigger.click(decide, inp, out, api_name="decide")

    linp = gr.Textbox(visible=False)
    lout = gr.Textbox(visible=False)
    ltrigger = gr.Button(visible=False)
    ltrigger.click(learn, linp, lout, api_name="learn")

    pinp = gr.Textbox(visible=False)
    pout = gr.Textbox(visible=False)
    ptrigger = gr.Button(visible=False)
    ptrigger.click(piano, pinp, pout, api_name="piano")


if __name__ == "__main__":
    demo.queue(default_concurrency_limit=8).launch(
        server_name="0.0.0.0",
        server_port=int(os.environ.get("PORT", "7860")),
        allowed_paths=[AUDIO_DIR, PIANO_SAMPLES_DIR],
        # Gradio 6 moved these from the Blocks constructor to launch()
        theme=gr.themes.Base(),
        head=HEAD,
    )