File size: 13,151 Bytes
414dc55
 
 
 
 
80cd1f2
 
 
 
 
 
 
 
414dc55
 
 
 
80cd1f2
414dc55
 
 
80cd1f2
414dc55
 
 
 
 
80cd1f2
414dc55
 
 
 
 
 
 
 
80cd1f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80cd1f2
414dc55
 
 
 
 
80cd1f2
 
 
 
 
 
 
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80cd1f2
414dc55
 
 
80cd1f2
 
 
414dc55
 
 
 
80cd1f2
 
 
 
414dc55
80cd1f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
80cd1f2
 
 
414dc55
 
80cd1f2
 
414dc55
80cd1f2
414dc55
 
 
 
 
 
 
 
 
 
 
80cd1f2
 
 
 
 
414dc55
 
 
 
80cd1f2
 
 
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80cd1f2
 
 
 
414dc55
80cd1f2
414dc55
 
 
 
80cd1f2
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
"""Live game runtime: lazily builds the in-process llama.cpp backend, generates cases,
and holds live ``Session`` objects per run.

Single-flight is MANDATORY: ``llama_cpp.Llama`` is not thread-safe, so every model call
(generation + interrogation) runs under one lock - never concurrently, on any machine.

Background case generation runs on EVERY box, but it can never make a player wait:
each generation call holds the lock for just that one call (never the whole pipeline),
and on a small box it streams with an interrupt check between tokens - the moment a
player asks a question the in-flight generation aborts within a token, the lock frees,
and the turn runs. Generation resumes once the table has been idle for a while. Fresh
AI cases land in the buffer (served on the very next New Case) AND join the shuffled
rotation, so the pool of mysteries grows for as long as the Space stays up.
"""

from __future__ import annotations

import random
import threading
import time
import uuid
from collections.abc import Iterator
from dataclasses import dataclass

from ..config import effective_cpus, get_settings
from ..engine.session import Session
from ..generator.pipeline import generate_case
from ..llm.backend import GenParams, LLMBackend, LLMError, make_backend
from ..persistence.case_store import load_case, save_runtime_case
from ..persistence.paths import prebaked_cases_dir, runtime_cases_dir
from ..schemas.accusation import Accusation
from ..schemas.case import CaseFile
from ..schemas.enums import Relevance
from .case_adapter import casefile_to_public
from .public_view import PublicCase

# How long the table must be quiet (no interrogation turn) before a small box starts -
# or resumes - a background generation.
_IDLE_SECS = 90.0


class _SharedLockBackend:
    """Per-call single-flight wrapper. Each model call holds the runtime lock for JUST
    that call, so a player's turn waits behind at most one in-flight call - never a
    whole multi-call generation. With an ``interrupt`` event the call streams internally
    and aborts between tokens the moment a player shows up, freeing the lock at once."""

    def __init__(self, inner: LLMBackend, lock: threading.Lock,
                 interrupt: threading.Event | None = None) -> None:
        self._inner = inner
        self._lock = lock
        self._interrupt = interrupt

    def generate(self, prompt: str, params: GenParams) -> str:
        with self._lock:
            if self._interrupt is None:
                return self._inner.generate(prompt, params)
            if self._interrupt.is_set():
                raise LLMError("generation interrupted by player")
            parts: list[str] = []
            for delta in self._inner.stream(prompt, params):
                parts.append(delta)
                if self._interrupt.is_set():
                    raise LLMError("generation interrupted by player")
            return "".join(parts)

    def stream(self, prompt: str, params: GenParams) -> Iterator[str]:
        with self._lock:
            yield from self._inner.stream(prompt, params)


@dataclass
class LiveRun:
    run_id: str
    case: CaseFile
    session: Session
    public: PublicCase
    baselines: dict[str, int]


class GameRuntime:
    def __init__(self) -> None:
        self._lock = threading.Lock()  # MANDATORY single-flight over all model calls
        self._backend: LLMBackend | None = None
        self._backend_failed = False
        self._runs: dict[str, LiveRun] = {}
        self._buffer: CaseFile | None = None
        self._buffer_lock = threading.Lock()
        self._seed = int(time.time()) % 900_000 + 1000
        self._rng = random.Random(self._seed)
        # Pre-baked pool: full, model-authored cases shipped with the Space, served instantly
        # on New Case so nobody waits ~2 min for live generation. Interrogation is still live.
        self._prebaked: list[CaseFile] = []
        self._prebaked_idx = 0
        self._prebaked_loaded = False
        # Background generation: a fast box generates immediately; a small box (the 2-vCPU
        # Space) waits for an idle table and aborts between tokens when a player shows up.
        self._fast_box = effective_cpus() > 4
        self._gen_interrupt = threading.Event()
        self._gen_running = False
        self._gen_flag_lock = threading.Lock()
        self._last_player_ts = 0.0

    # ---- backend ----
    def _get_backend(self) -> LLMBackend | None:
        if self._backend is None and not self._backend_failed:
            try:
                self._backend = make_backend(get_settings())
            except LLMError:
                self._backend_failed = True
        return self._backend

    def available(self) -> bool:
        return self._get_backend() is not None

    def _next_seed(self) -> int:
        self._seed += 1
        return self._seed

    # ---- generation ----
    def _generate(self, seed: int, *, interruptible: bool = False) -> CaseFile:
        backend = self._get_backend()
        if backend is None:
            raise LLMError("no backend")
        wrapped = _SharedLockBackend(backend, self._lock,
                                     self._gen_interrupt if interruptible else None)
        result = generate_case(wrapped, seed=seed)
        save_runtime_case(result.case)
        return result.case

    def _prebuild(self) -> None:
        """Generate ONE fresh AI case in the background, yielding to players. On a small
        box: wait until the table is idle, abort between tokens if a player interrupts,
        then wait for idle again and retry. The finished case is served on the very next
        New Case and joins the rotation for good."""
        try:
            for _ in range(8):
                try:
                    if not self._fast_box:
                        while time.time() - self._last_player_ts < _IDLE_SECS:
                            time.sleep(5)
                    case = self._generate(self._next_seed(), interruptible=not self._fast_box)
                except LLMError:
                    continue  # interrupted by a player, or malformed output - try again
                except Exception:
                    break
                with self._buffer_lock:
                    self._buffer = case
                self._prebaked.append(case)
                break
        finally:
            with self._gen_flag_lock:
                self._gen_running = False

    def _spawn_gen(self) -> None:
        if not self.available():
            return
        with self._gen_flag_lock:
            if self._gen_running:
                return
            self._gen_running = True
        threading.Thread(target=self._prebuild, daemon=True).start()

    def _load_prebaked(self) -> None:
        if self._prebaked_loaded:
            return
        self._prebaked_loaded = True
        pool_dir = prebaked_cases_dir()
        if not pool_dir.is_dir():
            return
        for path in sorted(pool_dir.glob("*.json")):
            try:
                self._prebaked.append(load_case(path))
            except Exception:
                continue
        # Shuffle per process (the seed is time-based) so the FIRST case of every fresh
        # visit/restart is randomized - never the same opening mystery twice in a row.
        self._rng.shuffle(self._prebaked)

    def start_buffer(self) -> None:
        """Make the first New Case instant (shipped pool, shuffled) and start growing the
        pool with a fresh AI-generated case in the background."""
        self._load_prebaked()
        self._spawn_gen()

    def _take_buffered(self) -> CaseFile | None:
        with self._buffer_lock:
            case = self._buffer
            self._buffer = None
        return case

    def _take_prebaked(self) -> CaseFile | None:
        self._load_prebaked()
        if not self._prebaked:
            return None
        if self._prebaked_idx >= len(self._prebaked):
            # Bag exhausted: reshuffle for a fresh order on the next lap.
            self._rng.shuffle(self._prebaked)
            self._prebaked_idx = 0
        case = self._prebaked[self._prebaked_idx]
        self._prebaked_idx += 1
        return case

    def _maybe_refill(self) -> None:
        """Keep one fresh AI case cooking in the background whenever the buffer is empty."""
        if self._buffer is None:
            self._spawn_gen()

    def new_generated_run(self) -> tuple[PublicCase, str] | None:
        if not self.available():
            return None
        # Prefer a freshly generated case if one is ready; else serve the pre-baked pool
        # instantly; only with neither do we generate synchronously (first run, no pool).
        case = self._take_buffered() or self._take_prebaked()
        if case is None:
            try:
                case = self._generate(self._next_seed())
            except Exception:
                return None
        self._maybe_refill()
        return self._register(case)

    def load_generated_run(self, case_id: str) -> tuple[PublicCase, str] | None:
        if not self.available():
            return None
        self._load_prebaked()
        case = next((c for c in self._prebaked if c.case_id == case_id), None)
        if case is None:
            for directory in (prebaked_cases_dir(), runtime_cases_dir()):
                path = directory / f"{case_id}.json"
                if path.exists():
                    try:
                        case = load_case(path)
                    except Exception:
                        case = None
                    break
        if case is None:
            return None
        return self._register(case)

    def _register(self, case: CaseFile) -> tuple[PublicCase, str]:
        public = casefile_to_public(case)
        session = Session(case, self._get_backend())  # type: ignore[arg-type]
        run_id = uuid.uuid4().hex
        baselines = {s.id: s.baseline_suspicion for s in public.suspects}
        self._runs[run_id] = LiveRun(run_id, case, session, public, baselines)
        return public, run_id

    def get(self, run_id: str) -> LiveRun | None:
        return self._runs.get(run_id)

    # ---- live turn / verdict ----
    def _suspicion(self, run: LiveRun, sus_id: str) -> int:
        st = run.session.state.state_for(sus_id)
        base = run.baselines.get(sus_id, 25)
        val = base + round(st.stress * 55) + (20 if st.broken_lie_ids else 0)
        return max(0, min(100, val))

    def interrogate_live(
        self, run: LiveRun, sus_id: str, question: str, clue_id: str | None
    ) -> dict:
        prev = self._suspicion(run, sus_id)
        # Tell any in-flight background generation to yield the lock NOW (it aborts
        # between tokens), then take the table.
        self._gen_interrupt.set()
        self._last_player_ts = time.time()
        with self._lock:
            self._gen_interrupt.clear()
            final = None
            for ev in run.session.interrogate(sus_id, question, presented_clue_id=clue_id):
                if ev.final is not None:
                    final = ev.final
        self._last_player_ts = time.time()
        reply = final.turn.spoken if final else "…I have nothing to say to that."
        after = self._suspicion(run, sus_id)
        adj = final.adjudication if final else None
        rattled = bool(adj and adj.relevance in (Relevance.DIRECT, Relevance.BREAKING))
        cornered = bool(adj and adj.is_contradiction)
        return {
            "reply": reply,
            "suspicionDelta": after - prev,
            "suspicion": after,
            "flags": {"rattled": rattled, "contradictionExposed": cornered, "cornered": cornered},
        }

    def accuse_live(self, run: LiveRun, suspect_id: str, motive_id: str, evidence_ids: list[str]) -> dict:
        verdict = run.session.accuse(
            Accusation(accused_sus_id=suspect_id, motive_id=motive_id, cited_clue_ids=tuple(evidence_ids))
        )
        culprit_id = run.case.culprit.sus_id
        killer = run.case.suspect(culprit_id)
        if verdict.culprit_correct:
            truth = verdict.rationale or run.case.culprit.method_narrative
        else:
            accused = run.case.suspect(suspect_id).name if any(s.sus_id == suspect_id for s in run.case.suspects) else "the accused"
            truth = (
                f"You charged {accused}. The case held for a night - but the evidence led past "
                f"them to {killer.name}, who walked out into the rain."
            )
        return {
            "correct": verdict.culprit_correct,
            "verdict": {
                "stamp": "CASE CLOSED" if verdict.culprit_correct else "MISTRIAL",
                "killerId": culprit_id,
                "killerName": killer.name,
                "truth": truth,
            },
            "score": {
                "points": verdict.score,
                "max": 100,
                "killerCorrect": verdict.culprit_correct,
                "motiveCorrect": verdict.motive_correct,
                "evidenceHits": len(evidence_ids),
            },
            "stats": [],
        }


RUNTIME = GameRuntime()