Spaces:

amine-yagoub
/

CodeTribunal

Running

File size: 12,139 Bytes

2e03441

"""Phase runners: investigation, trial, verdict, and report generators.

Each function is a standalone generator that yields PipelineEvent objects,
keeping courtroom.py thin as an orchestrator.
"""

import logging
import time

import litellm
from crewai import Crew, Process, Task

from code_tribunal.agents import (
    architecture_investigator,
    defense_attorney,
    judge as judge_agent,
    prosecutor,
    verdict_report_agent,
)
from code_tribunal.config import TribunalConfig
from code_tribunal.pipeline import Phase, PipelineEvent, Pipeline
from code_tribunal.react import react_loop_stream

log = logging.getLogger("code_tribunal")

_MAX_RETRIES = 5
_BASE_DELAY = 4.0
_MIN_YIELD_INTERVAL = 0.05


def _crew_kickoff_with_retry(crew: Crew) -> object:
    """Run crew.kickoff() with exponential backoff on rate-limit errors."""
    for attempt in range(_MAX_RETRIES):
        try:
            return crew.kickoff()
        except litellm.RateLimitError:
            if attempt == _MAX_RETRIES - 1:
                raise
            delay = _BASE_DELAY * (2 ** attempt)
            log.warning("[RETRY] Rate limited (attempt %d/%d), waiting %.0fs...", attempt + 1, _MAX_RETRIES, delay)
            time.sleep(delay)


def _domain_text(report, domain: str) -> str:
    """Extract findings for a specific domain from an EvidenceReport."""
    findings = report.findings_by_domain.get(domain, [])
    if not findings:
        return f"No {domain} findings detected."
    return "\n".join(str(f) for f in findings)


INVESTIGATION_AGENT_CONFIGS = [
    (
        "security",
        "Security Forensic Investigator",
        "You are a security forensic investigator. You find vulnerabilities, assess attack vectors, "
        "and determine severity. You cite specific file paths and line numbers.",
    ),
    (
        "quality",
        "Code Quality Forensic Investigator",
        "You are a code quality forensic investigator. You identify technical debt, dead code, "
        "missing error handling, and signs of rushed development.",
    ),
    (
        "architecture",
        "Architecture Forensic Investigator",
        "You are an architecture forensic investigator. You identify structural problems, "
        "tight coupling, missing abstractions, and scalability concerns.",
    ),
]


def run_investigation(config, pipeline: Pipeline, report, tools, target_dir):
    """Phase 4: Run 3 investigators using ReACT loop with real tool calling."""
    evidence_text = report.to_text()
    domain_evidence = {
        "security": _domain_text(report, "security"),
        "quality": _domain_text(report, "quality"),
        "architecture": _domain_text(report, "architecture"),
    }

    investigation_reports = {}

    for label, role, goal in INVESTIGATION_AGENT_CONFIGS:
        log.debug("[INVESTIGATION] Starting %s investigator...", label)
        domain_ev = domain_evidence.get(label, "No specific findings.")
        task_desc = (
            f"Investigate this codebase for {label} issues.\n\n"
            f"{label.upper()} EVIDENCE:\n{domain_ev}\n\n"
            f"FULL EVIDENCE REPORT:\n{evidence_text}\n\n"
            "Use your tools to read files, search for patterns, and trace call chains. "
            "Produce a detailed investigation report with specific findings, severities, and remediation."
        )

        last_yield = 0.0
        full_output = ""

        for agent_role, delta, is_tool in react_loop_stream(
            config=config,
            task_description=task_desc,
            agent_role=role,
            agent_goal=goal,
            tools=tools,
            max_iterations=8,
        ):
            if pipeline.is_cancelled:
                return

            now = time.time()
            if is_tool:
                yield PipelineEvent(
                    Phase.INVESTIGATION,
                    f"{label} investigator: {delta.strip()}",
                    agent_role=agent_role,
                    delta=delta,
                )
                full_output += delta
                last_yield = now
            elif now - last_yield >= _MIN_YIELD_INTERVAL:
                yield PipelineEvent(
                    Phase.INVESTIGATION,
                    f"{label} investigator analyzing...",
                    agent_role=agent_role,
                    delta=delta,
                )
                full_output += delta
                last_yield = now

        log.debug("[INVESTIGATION] %s investigator done. Output length: %d", label, len(full_output))
        investigation_reports[label] = full_output

    pipeline.update(investigation_reports=investigation_reports)
    yield PipelineEvent(
        Phase.INVESTIGATION,
        f"Investigation complete: {len(investigation_reports)} reports generated.",
        data={"reports": investigation_reports},
    )


def run_trial(config: TribunalConfig, pipeline: Pipeline, report, tools, target_dir):
    """Phase 5: Prosecutor vs Defense — non-streaming."""
    evidence_text = report.to_text()
    inv_reports = pipeline.state.investigation_reports if pipeline.state else {}
    investigation_text = "\n\n".join(
        f"=== {k.upper()} INVESTIGATION ===\n{v}"
        for k, v in (inv_reports or {}).items()
    )

    pros_agent = prosecutor(config, tools=tools)
    def_agent = defense_attorney(config, tools=tools)
    rebuttal_agent = prosecutor(config, tools=tools)

    prosecution_task = Task(
        description=(
            "PRESENT THE PROSECUTION'S CASE\n\n"
            "You are presenting evidence against a freelance developer who delivered this code.\n\n"
            "RAW EVIDENCE:\n" + evidence_text + "\n\n"
            "INVESTIGATION REPORTS:\n" + investigation_text + "\n\n"
            "Build your case. Be specific - cite file paths, line numbers, and vulnerability types. "
            "Argue that this code represents negligence, not mere imperfection."
        ),
        agent=pros_agent,
        expected_output="A compelling prosecution argument citing specific evidence.",
    )

    defense_task = Task(
        description=(
            "PRESENT THE DEFENSE\n\n"
            "The prosecution has presented their case. Mount your defense.\n\n"
            "RAW EVIDENCE:\n" + evidence_text + "\n\n"
            "INVESTIGATION REPORTS:\n" + investigation_text + "\n\n"
            "Challenge specific claims. Argue proportionality - not every issue is negligence. Be honest but vigorous."
        ),
        agent=def_agent,
        context=[prosecution_task],
        expected_output="A vigorous defense argument.",
    )

    rebuttal_task = Task(
        description=(
            "REBUTTAL\n\n"
            "The defense has responded. Address their strongest points. "
            "End with a closing argument for the judge."
        ),
        agent=rebuttal_agent,
        context=[prosecution_task, defense_task],
        expected_output="A sharp rebuttal and closing argument.",
    )

    crew = Crew(
        agents=[pros_agent, def_agent, rebuttal_agent],
        tasks=[prosecution_task, defense_task, rebuttal_task],
        process=Process.sequential,
        verbose=False,
    )

    trial_transcript = ""
    round_names = ["Prosecution", "Defense", "Rebuttal"]
    try:
        log.debug("[TRIAL] Running crew.kickoff()...")
        result = _crew_kickoff_with_retry(crew)
        task_outputs = result.tasks_output if hasattr(result, "tasks_output") else []
        parts = []
        for i, name in enumerate(round_names):
            if i < len(task_outputs):
                raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
                parts.append(f"=== {name} ===\n{raw}")
        trial_transcript = "\n\n".join(parts)
        log.debug("[TRIAL] crew.kickoff() done. Transcript length: %d", len(trial_transcript))
    except Exception as e:
        log.debug("[TRIAL] FAILED: %s", e, exc_info=True)
        trial_transcript = f"Trial fallback: {e}"

    pipeline.update(trial_transcript=trial_transcript)
    yield PipelineEvent(
        Phase.TRIAL,
        "Trial complete. The Judge is preparing to deliberate.",
        data={"transcript": trial_transcript},
    )


def run_verdict(config: TribunalConfig, pipeline: Pipeline, report, target_dir):
    """Phase 6: Judge delivers verdict — non-streaming."""
    evidence_text = report.to_text()
    state = pipeline.state
    inv_text = "\n\n".join(
        f"=== {k.upper()} INVESTIGATION ===\n{v}"
        for k, v in (state.investigation_reports or {}).items()
    )
    transcript = state.trial_transcript or ""

    judge = judge_agent(config)

    verdict_task = Task(
        description=(
            "DELIVER YOUR VERDICT\n\n"
            "RAW EVIDENCE:\n" + evidence_text + "\n\n"
            "INVESTIGATION REPORTS:\n" + inv_text + "\n\n"
            "TRIAL TRANSCRIPT:\n" + transcript + "\n\n"
            "Deliver a structured verdict:\n"
            "## VERDICT\nOverall: [GUILTY / MIXED / NOT GUILTY]\n"
            "Reputational Risk Score: [0-100]\n\n"
            "## FINDINGS SUMMARY\n"
            "For each finding: severity, impact, remediation\n\n"
            "## SENTENCE\n"
            "Your final assessment and recommendations."
        ),
        agent=judge,
        expected_output="A structured verdict with overall ruling, risk score, findings summary, and sentence.",
    )

    crew = Crew(agents=[judge], tasks=[verdict_task], verbose=False)

    verdict_text = ""
    try:
        log.debug("[VERDICT] Running crew.kickoff()...")
        result = _crew_kickoff_with_retry(crew)
        verdict_text = result.raw if hasattr(result, "raw") else str(result)
        log.debug("[VERDICT] crew.kickoff() done. Verdict length: %d", len(verdict_text))
    except Exception as e:
        log.debug("[VERDICT] FAILED: %s", e, exc_info=True)
        verdict_text = f"Verdict fallback: {e}"

    pipeline.update(verdict=verdict_text)
    yield PipelineEvent(Phase.VERDICT, "Verdict delivered.", data={"verdict": verdict_text})


def run_report(config: TribunalConfig, pipeline: Pipeline, report, target_dir):
    """Phase 7: Verdict Report Agent — non-streaming."""
    state = pipeline.state
    report_ag = verdict_report_agent(config)

    inv_text = "\n\n".join(
        f"=== {k.upper()} INVESTIGATION ===\n{v}"
        for k, v in (state.investigation_reports or {}).items()
    )

    report_task = Task(
        description=(
            "Generate the FINAL STRUCTURED REPORT from this trial.\n\n"
            "The Judge has delivered a verdict. Now compile everything into a clear, "
            "professional report for the client.\n\n"
            "EVIDENCE:\n" + report.to_text() + "\n\n"
            "INVESTIGATION REPORTS:\n" + inv_text + "\n\n"
            "TRIAL TRANSCRIPT:\n" + (state.trial_transcript or "") + "\n\n"
            "VERDICT:\n" + (state.verdict or "") + "\n\n"
            "The report MUST include:\n"
            "1. **Executive Summary** - one paragraph overview\n"
            "2. **Findings Table** - all findings sorted by severity (CRITICAL first)\n"
            "3. **Per-Finding Analysis** - impact, remediation, estimated fix effort\n"
            "4. **Sentencing Recommendations** - what the client should do next\n"
            "5. **Reputational Risk Score** - breakdown of how the score was calculated"
        ),
        agent=report_ag,
        expected_output="A comprehensive structured report.",
    )

    crew = Crew(agents=[report_ag], tasks=[report_task], verbose=False)

    report_text = ""
    try:
        log.debug("[REPORT] Running crew.kickoff()...")
        result = _crew_kickoff_with_retry(crew)
        report_text = result.raw if hasattr(result, "raw") else str(result)
        log.debug("[REPORT] crew.kickoff() done. Report length: %d", len(report_text))
    except Exception as e:
        log.debug("[REPORT] FAILED: %s", e, exc_info=True)
        report_text = f"Report generation fallback: {e}"

    pipeline.update(report=report_text)
    pipeline.complete()
    yield PipelineEvent(Phase.REPORT, "Report generated.", data={"report": report_text})