File size: 12,139 Bytes
2e03441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
"""Phase runners: investigation, trial, verdict, and report generators.

Each function is a standalone generator that yields PipelineEvent objects,
keeping courtroom.py thin as an orchestrator.
"""

import logging
import time

import litellm
from crewai import Crew, Process, Task

from code_tribunal.agents import (
    architecture_investigator,
    defense_attorney,
    judge as judge_agent,
    prosecutor,
    verdict_report_agent,
)
from code_tribunal.config import TribunalConfig
from code_tribunal.pipeline import Phase, PipelineEvent, Pipeline
from code_tribunal.react import react_loop_stream

log = logging.getLogger("code_tribunal")

_MAX_RETRIES = 5
_BASE_DELAY = 4.0
_MIN_YIELD_INTERVAL = 0.05


def _crew_kickoff_with_retry(crew: Crew) -> object:
    """Run crew.kickoff() with exponential backoff on rate-limit errors."""
    for attempt in range(_MAX_RETRIES):
        try:
            return crew.kickoff()
        except litellm.RateLimitError:
            if attempt == _MAX_RETRIES - 1:
                raise
            delay = _BASE_DELAY * (2 ** attempt)
            log.warning("[RETRY] Rate limited (attempt %d/%d), waiting %.0fs...", attempt + 1, _MAX_RETRIES, delay)
            time.sleep(delay)


def _domain_text(report, domain: str) -> str:
    """Extract findings for a specific domain from an EvidenceReport."""
    findings = report.findings_by_domain.get(domain, [])
    if not findings:
        return f"No {domain} findings detected."
    return "\n".join(str(f) for f in findings)


INVESTIGATION_AGENT_CONFIGS = [
    (
        "security",
        "Security Forensic Investigator",
        "You are a security forensic investigator. You find vulnerabilities, assess attack vectors, "
        "and determine severity. You cite specific file paths and line numbers.",
    ),
    (
        "quality",
        "Code Quality Forensic Investigator",
        "You are a code quality forensic investigator. You identify technical debt, dead code, "
        "missing error handling, and signs of rushed development.",
    ),
    (
        "architecture",
        "Architecture Forensic Investigator",
        "You are an architecture forensic investigator. You identify structural problems, "
        "tight coupling, missing abstractions, and scalability concerns.",
    ),
]


def run_investigation(config, pipeline: Pipeline, report, tools, target_dir):
    """Phase 4: Run 3 investigators using ReACT loop with real tool calling."""
    evidence_text = report.to_text()
    domain_evidence = {
        "security": _domain_text(report, "security"),
        "quality": _domain_text(report, "quality"),
        "architecture": _domain_text(report, "architecture"),
    }

    investigation_reports = {}

    for label, role, goal in INVESTIGATION_AGENT_CONFIGS:
        log.debug("[INVESTIGATION] Starting %s investigator...", label)
        domain_ev = domain_evidence.get(label, "No specific findings.")
        task_desc = (
            f"Investigate this codebase for {label} issues.\n\n"
            f"{label.upper()} EVIDENCE:\n{domain_ev}\n\n"
            f"FULL EVIDENCE REPORT:\n{evidence_text}\n\n"
            "Use your tools to read files, search for patterns, and trace call chains. "
            "Produce a detailed investigation report with specific findings, severities, and remediation."
        )

        last_yield = 0.0
        full_output = ""

        for agent_role, delta, is_tool in react_loop_stream(
            config=config,
            task_description=task_desc,
            agent_role=role,
            agent_goal=goal,
            tools=tools,
            max_iterations=8,
        ):
            if pipeline.is_cancelled:
                return

            now = time.time()
            if is_tool:
                yield PipelineEvent(
                    Phase.INVESTIGATION,
                    f"{label} investigator: {delta.strip()}",
                    agent_role=agent_role,
                    delta=delta,
                )
                full_output += delta
                last_yield = now
            elif now - last_yield >= _MIN_YIELD_INTERVAL:
                yield PipelineEvent(
                    Phase.INVESTIGATION,
                    f"{label} investigator analyzing...",
                    agent_role=agent_role,
                    delta=delta,
                )
                full_output += delta
                last_yield = now

        log.debug("[INVESTIGATION] %s investigator done. Output length: %d", label, len(full_output))
        investigation_reports[label] = full_output

    pipeline.update(investigation_reports=investigation_reports)
    yield PipelineEvent(
        Phase.INVESTIGATION,
        f"Investigation complete: {len(investigation_reports)} reports generated.",
        data={"reports": investigation_reports},
    )


def run_trial(config: TribunalConfig, pipeline: Pipeline, report, tools, target_dir):
    """Phase 5: Prosecutor vs Defense — non-streaming."""
    evidence_text = report.to_text()
    inv_reports = pipeline.state.investigation_reports if pipeline.state else {}
    investigation_text = "\n\n".join(
        f"=== {k.upper()} INVESTIGATION ===\n{v}"
        for k, v in (inv_reports or {}).items()
    )

    pros_agent = prosecutor(config, tools=tools)
    def_agent = defense_attorney(config, tools=tools)
    rebuttal_agent = prosecutor(config, tools=tools)

    prosecution_task = Task(
        description=(
            "PRESENT THE PROSECUTION'S CASE\n\n"
            "You are presenting evidence against a freelance developer who delivered this code.\n\n"
            "RAW EVIDENCE:\n" + evidence_text + "\n\n"
            "INVESTIGATION REPORTS:\n" + investigation_text + "\n\n"
            "Build your case. Be specific - cite file paths, line numbers, and vulnerability types. "
            "Argue that this code represents negligence, not mere imperfection."
        ),
        agent=pros_agent,
        expected_output="A compelling prosecution argument citing specific evidence.",
    )

    defense_task = Task(
        description=(
            "PRESENT THE DEFENSE\n\n"
            "The prosecution has presented their case. Mount your defense.\n\n"
            "RAW EVIDENCE:\n" + evidence_text + "\n\n"
            "INVESTIGATION REPORTS:\n" + investigation_text + "\n\n"
            "Challenge specific claims. Argue proportionality - not every issue is negligence. Be honest but vigorous."
        ),
        agent=def_agent,
        context=[prosecution_task],
        expected_output="A vigorous defense argument.",
    )

    rebuttal_task = Task(
        description=(
            "REBUTTAL\n\n"
            "The defense has responded. Address their strongest points. "
            "End with a closing argument for the judge."
        ),
        agent=rebuttal_agent,
        context=[prosecution_task, defense_task],
        expected_output="A sharp rebuttal and closing argument.",
    )

    crew = Crew(
        agents=[pros_agent, def_agent, rebuttal_agent],
        tasks=[prosecution_task, defense_task, rebuttal_task],
        process=Process.sequential,
        verbose=False,
    )

    trial_transcript = ""
    round_names = ["Prosecution", "Defense", "Rebuttal"]
    try:
        log.debug("[TRIAL] Running crew.kickoff()...")
        result = _crew_kickoff_with_retry(crew)
        task_outputs = result.tasks_output if hasattr(result, "tasks_output") else []
        parts = []
        for i, name in enumerate(round_names):
            if i < len(task_outputs):
                raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
                parts.append(f"=== {name} ===\n{raw}")
        trial_transcript = "\n\n".join(parts)
        log.debug("[TRIAL] crew.kickoff() done. Transcript length: %d", len(trial_transcript))
    except Exception as e:
        log.debug("[TRIAL] FAILED: %s", e, exc_info=True)
        trial_transcript = f"Trial fallback: {e}"

    pipeline.update(trial_transcript=trial_transcript)
    yield PipelineEvent(
        Phase.TRIAL,
        "Trial complete. The Judge is preparing to deliberate.",
        data={"transcript": trial_transcript},
    )


def run_verdict(config: TribunalConfig, pipeline: Pipeline, report, target_dir):
    """Phase 6: Judge delivers verdict — non-streaming."""
    evidence_text = report.to_text()
    state = pipeline.state
    inv_text = "\n\n".join(
        f"=== {k.upper()} INVESTIGATION ===\n{v}"
        for k, v in (state.investigation_reports or {}).items()
    )
    transcript = state.trial_transcript or ""

    judge = judge_agent(config)

    verdict_task = Task(
        description=(
            "DELIVER YOUR VERDICT\n\n"
            "RAW EVIDENCE:\n" + evidence_text + "\n\n"
            "INVESTIGATION REPORTS:\n" + inv_text + "\n\n"
            "TRIAL TRANSCRIPT:\n" + transcript + "\n\n"
            "Deliver a structured verdict:\n"
            "## VERDICT\nOverall: [GUILTY / MIXED / NOT GUILTY]\n"
            "Reputational Risk Score: [0-100]\n\n"
            "## FINDINGS SUMMARY\n"
            "For each finding: severity, impact, remediation\n\n"
            "## SENTENCE\n"
            "Your final assessment and recommendations."
        ),
        agent=judge,
        expected_output="A structured verdict with overall ruling, risk score, findings summary, and sentence.",
    )

    crew = Crew(agents=[judge], tasks=[verdict_task], verbose=False)

    verdict_text = ""
    try:
        log.debug("[VERDICT] Running crew.kickoff()...")
        result = _crew_kickoff_with_retry(crew)
        verdict_text = result.raw if hasattr(result, "raw") else str(result)
        log.debug("[VERDICT] crew.kickoff() done. Verdict length: %d", len(verdict_text))
    except Exception as e:
        log.debug("[VERDICT] FAILED: %s", e, exc_info=True)
        verdict_text = f"Verdict fallback: {e}"

    pipeline.update(verdict=verdict_text)
    yield PipelineEvent(Phase.VERDICT, "Verdict delivered.", data={"verdict": verdict_text})


def run_report(config: TribunalConfig, pipeline: Pipeline, report, target_dir):
    """Phase 7: Verdict Report Agent — non-streaming."""
    state = pipeline.state
    report_ag = verdict_report_agent(config)

    inv_text = "\n\n".join(
        f"=== {k.upper()} INVESTIGATION ===\n{v}"
        for k, v in (state.investigation_reports or {}).items()
    )

    report_task = Task(
        description=(
            "Generate the FINAL STRUCTURED REPORT from this trial.\n\n"
            "The Judge has delivered a verdict. Now compile everything into a clear, "
            "professional report for the client.\n\n"
            "EVIDENCE:\n" + report.to_text() + "\n\n"
            "INVESTIGATION REPORTS:\n" + inv_text + "\n\n"
            "TRIAL TRANSCRIPT:\n" + (state.trial_transcript or "") + "\n\n"
            "VERDICT:\n" + (state.verdict or "") + "\n\n"
            "The report MUST include:\n"
            "1. **Executive Summary** - one paragraph overview\n"
            "2. **Findings Table** - all findings sorted by severity (CRITICAL first)\n"
            "3. **Per-Finding Analysis** - impact, remediation, estimated fix effort\n"
            "4. **Sentencing Recommendations** - what the client should do next\n"
            "5. **Reputational Risk Score** - breakdown of how the score was calculated"
        ),
        agent=report_ag,
        expected_output="A comprehensive structured report.",
    )

    crew = Crew(agents=[report_ag], tasks=[report_task], verbose=False)

    report_text = ""
    try:
        log.debug("[REPORT] Running crew.kickoff()...")
        result = _crew_kickoff_with_retry(crew)
        report_text = result.raw if hasattr(result, "raw") else str(result)
        log.debug("[REPORT] crew.kickoff() done. Report length: %d", len(report_text))
    except Exception as e:
        log.debug("[REPORT] FAILED: %s", e, exc_info=True)
        report_text = f"Report generation fallback: {e}"

    pipeline.update(report=report_text)
    pipeline.complete()
    yield PipelineEvent(Phase.REPORT, "Report generated.", data={"report": report_text})