File size: 6,255 Bytes
0ee66d2 4bac574 0ee66d2 4bac574 9940e16 0ee66d2 f2ee2fc 0ee66d2 4bac574 b658e10 0ee66d2 9940e16 0ee66d2 f2ee2fc 0ee66d2 f2ee2fc 0ee66d2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | """
AgentDebuggerEnv β FastAPI Server
===================================
Exposes the environment as REST endpoints:
POST /reset β Start a fresh episode
POST /step β Submit one action
GET /state β Full internal state
GET /health β Deployment health check (must return 200)
"""
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse, RedirectResponse
from pydantic import BaseModel
from typing import Optional
from env.environment import DebuggerEnvironment
from env.models import Action
from env.tasks.registry import list_tasks
app = FastAPI(
title="AgentDebuggerEnv",
description="An OpenEnv-compliant debugging environment for AI agents",
version="1.0.0",
)
# ββ Mount Gradio demo at /demo ββββββββββββββββββββββββββββββββββββββββββββββββ
try:
import gradio as gr
from demo.gradio_app import create_demo
_gradio_demo = create_demo()
app = gr.mount_gradio_app(app, _gradio_demo, path="/demo")
except ImportError:
pass # gradio not installed β API-only mode
# Single environment instance to manage the debugging lifecycle.
env = DebuggerEnvironment()
class ResetRequest(BaseModel):
task_id: Optional[str] = "easy"
@app.get("/", include_in_schema=False)
async def root_redirect():
return RedirectResponse(url="/demo")
@app.get("/api")
async def root():
return {
"name": "AgentDebuggerEnv",
"version": "1.0.0",
"description": (
"An OpenEnv-compliant environment where AI agents debug broken code "
"through iterative hypothesis-test-fix cycles. Unlike static benchmarks, "
"agents act in a live sandbox and observe real execution output each step."
),
"openenv_compliant": True,
"domain": "software_engineering",
"endpoints": {
"GET /": "This overview",
"GET /health": "Health check β returns 200 if server is live",
"GET /tasks": "List all available tasks with metadata",
"GET /state": "Current episode state",
"POST /reset": "Start a new episode. Body: {\"task_id\": \"easy\"|\"medium\"|\"hard\"}",
"POST /step": "Submit one action. Body: Action JSON",
},
"tasks": list_tasks(),
"reward_type": "dense",
"action_types": ["submit_fix", "query_context", "give_up"],
}
@app.get("/tasks")
async def get_tasks():
return {
"tasks": [
{
"id": "easy",
"name": "Single Function Off-By-One Bug",
"difficulty": "easy",
"max_attempts": 5,
"max_steps": 8,
"tests_total": 8,
"description": (
"Binary search with an off-by-one termination condition. "
"Error message is clear and high-signal. 1-2 iterations expected."
),
},
{
"id": "medium",
"name": "Red Herring Authentication Bug",
"difficulty": "medium",
"max_attempts": 7,
"max_steps": 15,
"tests_total": 10,
"description": (
"Authentication module where the error message points to the wrong "
"function. Agent must trace data flow backwards from symptom to root cause "
"and resist the red herring."
),
},
{
"id": "hard",
"name": "Concurrency Race Condition",
"difficulty": "hard",
"max_attempts": 10,
"max_steps": 25,
"tests_total": 8,
"description": (
"Thread-safe counter with a race condition invisible to all sequential tests. "
"Agent must recognize that passing tests are insufficient proof of correctness, "
"design a concurrent stress test to surface the bug, then fix the atomicity issue."
),
},
]
}
@app.get("/health")
async def health():
"""Health check endpoint to verify server availability."""
return {"status": "ok", "environment": "agentdebugger-env", "version": "1.0.0"}
@app.post("/reset")
async def reset(request: Optional[ResetRequest] = None):
"""Start a fresh episode. Returns initial Observation. Default to 'easy' task if body is missing."""
try:
task_id = request.task_id if request else "easy"
observation = env.reset(task_id)
return JSONResponse(content=observation, status_code=200)
except ValueError as e:
return JSONResponse(
content={"error": str(e), "available_tasks": list_tasks()},
status_code=400,
)
except Exception as e:
return JSONResponse(
content={"error": f"Internal error during reset: {str(e)}"},
status_code=200,
)
@app.post("/step")
async def step(action: Action):
"""Submit one action. Returns {observation, reward, done, info}. Always HTTP 200."""
try:
result = env.step(action)
return JSONResponse(content=result, status_code=200)
except Exception as e:
# Never return 500 β all errors go in response body
return JSONResponse(
content={
"observation": {},
"reward": {
"step_reward": 0.0,
"cumulative_reward": 0.0,
"grader_score": 0.0,
"breakdown": {},
},
"done": False,
"info": {"error": f"Internal error: {str(e)}"},
},
status_code=200,
)
@app.get("/state")
async def get_state():
"""Return full internal environment state as a plain dict."""
try:
state = env.state()
return JSONResponse(content=state, status_code=200)
except Exception as e:
return JSONResponse(
content={"error": f"Internal error: {str(e)}"},
status_code=200,
)
|