Spaces:
Sleeping
Sleeping
File size: 4,700 Bytes
4ded5ed d145b94 4ded5ed d145b94 4ded5ed a3f3034 4ded5ed d145b94 4ded5ed d145b94 4ded5ed d145b94 4ded5ed d145b94 4ded5ed d145b94 4ded5ed d145b94 4ded5ed d145b94 4ded5ed d145b94 82f5d48 a3f3034 d145b94 4ded5ed 3782916 4ded5ed 3782916 d145b94 4ded5ed d145b94 3782916 4ded5ed d145b94 82f5d48 a3f3034 d145b94 4ded5ed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Code review environment with task-based grading and normalized rewards."""
import uuid
from typing import Any
from openenv.core.env_server.interfaces import Action, Environment, Observation
from ..models import CodeAction, CodeObservation, CodeState
from .task_bank import (
format_task_prompt,
get_task,
grade_action,
list_tasks,
record_episode_score,
)
class PythonCodeActEnv(Environment):
"""
Task-driven code-review environment.
Episodes are single-step:
1. `reset(task_id=...)` returns a code snippet + task description.
2. Agent submits CodeAction(review, bug_type, line_number, confidence).
3. `step()` returns graded reward in [0.0, 1.0] and done=True.
"""
def __init__(
self,
):
super().__init__(transform=None)
self._state = CodeState()
self._current_task_id = "task_easy_1"
def reset(
self,
seed: int | None = None,
episode_id: str | None = None,
**kwargs: Any,
) -> Observation:
"""
Reset environment and pick a task (easy/medium/hard).
"""
requested_task_id = kwargs.get("task_id", self._current_task_id)
task = get_task(str(requested_task_id))
self._current_task_id = task.task_id
self._state = CodeState(
episode_id=episode_id or str(uuid.uuid4()),
step_count=0,
task_id=task.task_id,
difficulty=task.difficulty,
last_score=0.0,
)
self._state.last_exit_code = 0
observation = CodeObservation(
stdout="Task initialized.",
stderr="",
exit_code=0,
task_id=task.task_id,
difficulty=task.difficulty,
task_description=(
"Review this pull request and report the highest-impact issue "
"with file_path, issue_type, severity, line_number, and evidence."
),
code_snippet=format_task_prompt(task),
pr_title=task.pr_title,
pr_description=task.pr_description,
changed_files="\n".join(task.changed_files),
previous_feedback="",
done=False,
reward=0.0,
metadata={"available_tasks": list_tasks()},
)
return self._apply_transform(observation)
def step(
self,
action: Action,
timeout_s: float | None = None,
**kwargs: Any,
) -> Observation:
"""
Execute code action and return observation.
Args:
action: CodeAction containing the code to execute
Returns:
CodeObservation with execution results (stdout, stderr, exit_code)
Raises:
ValueError: If action is not a CodeAction instance
"""
if not isinstance(action, CodeAction):
raise ValueError(f"Expected CodeAction, got {type(action)}")
requested_task_id = kwargs.get("task_id")
task_id = str(requested_task_id or self._state.task_id or self._current_task_id)
task = get_task(task_id)
episode_id = str(
kwargs.get("episode_id") or self._state.episode_id or str(uuid.uuid4())
)
self._state.task_id = task.task_id
self._state.difficulty = task.difficulty
self._state.episode_id = episode_id
reward, feedback = grade_action(action, task)
self._state.step_count += 1
self._state.last_exit_code = 0
self._state.last_score = reward
record_episode_score(task.task_id, episode_id, reward)
observation = CodeObservation(
stdout=feedback,
stderr="",
exit_code=0,
task_id=task.task_id,
difficulty=task.difficulty,
task_description=(
"Review this pull request and report the highest-impact issue "
"with file_path, issue_type, severity, line_number, and evidence."
),
code_snippet=format_task_prompt(task),
pr_title=task.pr_title,
pr_description=task.pr_description,
changed_files="\n".join(task.changed_files),
previous_feedback=feedback,
reward=reward,
done=True,
)
return self._apply_transform(observation)
@property
def state(self) -> CodeState:
"""Get current environment state including last exit code."""
return self._state
|