File size: 6,255 Bytes
0ee66d2
 
 
 
 
 
 
 
 
 
 
4bac574
0ee66d2
 
 
 
 
 
 
 
 
 
 
 
 
4bac574
 
 
 
 
 
 
 
 
9940e16
0ee66d2
 
 
 
f2ee2fc
0ee66d2
 
4bac574
 
 
 
 
 
b658e10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee66d2
 
9940e16
0ee66d2
 
 
 
f2ee2fc
 
0ee66d2
f2ee2fc
 
0ee66d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
AgentDebuggerEnv β€” FastAPI Server
===================================
Exposes the environment as REST endpoints:
  POST /reset  β€” Start a fresh episode
  POST /step   β€” Submit one action
  GET  /state  β€” Full internal state
  GET  /health β€” Deployment health check (must return 200)
"""

from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse, RedirectResponse
from pydantic import BaseModel
from typing import Optional

from env.environment import DebuggerEnvironment
from env.models import Action
from env.tasks.registry import list_tasks

app = FastAPI(
    title="AgentDebuggerEnv",
    description="An OpenEnv-compliant debugging environment for AI agents",
    version="1.0.0",
)

# ── Mount Gradio demo at /demo ────────────────────────────────────────────────
try:
    import gradio as gr
    from demo.gradio_app import create_demo
    _gradio_demo = create_demo()
    app = gr.mount_gradio_app(app, _gradio_demo, path="/demo")
except ImportError:
    pass  # gradio not installed β€” API-only mode

# Single environment instance to manage the debugging lifecycle.
env = DebuggerEnvironment()


class ResetRequest(BaseModel):
    task_id: Optional[str] = "easy"


@app.get("/", include_in_schema=False)
async def root_redirect():
    return RedirectResponse(url="/demo")


@app.get("/api")
async def root():
    return {
        "name": "AgentDebuggerEnv",
        "version": "1.0.0",
        "description": (
            "An OpenEnv-compliant environment where AI agents debug broken code "
            "through iterative hypothesis-test-fix cycles. Unlike static benchmarks, "
            "agents act in a live sandbox and observe real execution output each step."
        ),
        "openenv_compliant": True,
        "domain": "software_engineering",
        "endpoints": {
            "GET  /":        "This overview",
            "GET  /health":  "Health check β€” returns 200 if server is live",
            "GET  /tasks":   "List all available tasks with metadata",
            "GET  /state":   "Current episode state",
            "POST /reset":   "Start a new episode. Body: {\"task_id\": \"easy\"|\"medium\"|\"hard\"}",
            "POST /step":    "Submit one action. Body: Action JSON",
        },
        "tasks": list_tasks(),
        "reward_type": "dense",
        "action_types": ["submit_fix", "query_context", "give_up"],
    }


@app.get("/tasks")
async def get_tasks():
    return {
        "tasks": [
            {
                "id": "easy",
                "name": "Single Function Off-By-One Bug",
                "difficulty": "easy",
                "max_attempts": 5,
                "max_steps": 8,
                "tests_total": 8,
                "description": (
                    "Binary search with an off-by-one termination condition. "
                    "Error message is clear and high-signal. 1-2 iterations expected."
                ),
            },
            {
                "id": "medium",
                "name": "Red Herring Authentication Bug",
                "difficulty": "medium",
                "max_attempts": 7,
                "max_steps": 15,
                "tests_total": 10,
                "description": (
                    "Authentication module where the error message points to the wrong "
                    "function. Agent must trace data flow backwards from symptom to root cause "
                    "and resist the red herring."
                ),
            },
            {
                "id": "hard",
                "name": "Concurrency Race Condition",
                "difficulty": "hard",
                "max_attempts": 10,
                "max_steps": 25,
                "tests_total": 8,
                "description": (
                    "Thread-safe counter with a race condition invisible to all sequential tests. "
                    "Agent must recognize that passing tests are insufficient proof of correctness, "
                    "design a concurrent stress test to surface the bug, then fix the atomicity issue."
                ),
            },
        ]
    }


@app.get("/health")
async def health():
    """Health check endpoint to verify server availability."""
    return {"status": "ok", "environment": "agentdebugger-env", "version": "1.0.0"}


@app.post("/reset")
async def reset(request: Optional[ResetRequest] = None):
    """Start a fresh episode. Returns initial Observation. Default to 'easy' task if body is missing."""
    try:
        task_id = request.task_id if request else "easy"
        observation = env.reset(task_id)
        return JSONResponse(content=observation, status_code=200)
    except ValueError as e:
        return JSONResponse(
            content={"error": str(e), "available_tasks": list_tasks()},
            status_code=400,
        )
    except Exception as e:
        return JSONResponse(
            content={"error": f"Internal error during reset: {str(e)}"},
            status_code=200,
        )


@app.post("/step")
async def step(action: Action):
    """Submit one action. Returns {observation, reward, done, info}. Always HTTP 200."""
    try:
        result = env.step(action)
        return JSONResponse(content=result, status_code=200)
    except Exception as e:
        # Never return 500 β€” all errors go in response body
        return JSONResponse(
            content={
                "observation": {},
                "reward": {
                    "step_reward": 0.0,
                    "cumulative_reward": 0.0,
                    "grader_score": 0.0,
                    "breakdown": {},
                },
                "done": False,
                "info": {"error": f"Internal error: {str(e)}"},
            },
            status_code=200,
        )


@app.get("/state")
async def get_state():
    """Return full internal environment state as a plain dict."""
    try:
        state = env.state()
        return JSONResponse(content=state, status_code=200)
    except Exception as e:
        return JSONResponse(
            content={"error": f"Internal error: {str(e)}"},
            status_code=200,
        )