# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """Code review environment with task-based grading and normalized rewards.""" import uuid from typing import Any from openenv.core.env_server.interfaces import Action, Environment, Observation from ..models import CodeAction, CodeObservation, CodeState from .task_bank import ( format_task_prompt, get_task, grade_action, list_tasks, record_episode_score, ) class PythonCodeActEnv(Environment): """ Task-driven code-review environment. Episodes are single-step: 1. `reset(task_id=...)` returns a code snippet + task description. 2. Agent submits CodeAction(review, bug_type, line_number, confidence). 3. `step()` returns graded reward in [0.0, 1.0] and done=True. """ def __init__( self, ): super().__init__(transform=None) self._state = CodeState() self._current_task_id = "task_easy_1" def reset( self, seed: int | None = None, episode_id: str | None = None, **kwargs: Any, ) -> Observation: """ Reset environment and pick a task (easy/medium/hard). """ requested_task_id = kwargs.get("task_id", self._current_task_id) task = get_task(str(requested_task_id)) self._current_task_id = task.task_id self._state = CodeState( episode_id=episode_id or str(uuid.uuid4()), step_count=0, task_id=task.task_id, difficulty=task.difficulty, last_score=0.0, ) self._state.last_exit_code = 0 observation = CodeObservation( stdout="Task initialized.", stderr="", exit_code=0, task_id=task.task_id, difficulty=task.difficulty, task_description=( "Review this pull request and report the highest-impact issue " "with file_path, issue_type, severity, line_number, and evidence." ), code_snippet=format_task_prompt(task), pr_title=task.pr_title, pr_description=task.pr_description, changed_files="\n".join(task.changed_files), previous_feedback="", done=False, reward=0.0, metadata={"available_tasks": list_tasks()}, ) return self._apply_transform(observation) def step( self, action: Action, timeout_s: float | None = None, **kwargs: Any, ) -> Observation: """ Execute code action and return observation. Args: action: CodeAction containing the code to execute Returns: CodeObservation with execution results (stdout, stderr, exit_code) Raises: ValueError: If action is not a CodeAction instance """ if not isinstance(action, CodeAction): raise ValueError(f"Expected CodeAction, got {type(action)}") requested_task_id = kwargs.get("task_id") task_id = str(requested_task_id or self._state.task_id or self._current_task_id) task = get_task(task_id) episode_id = str( kwargs.get("episode_id") or self._state.episode_id or str(uuid.uuid4()) ) self._state.task_id = task.task_id self._state.difficulty = task.difficulty self._state.episode_id = episode_id reward, feedback = grade_action(action, task) self._state.step_count += 1 self._state.last_exit_code = 0 self._state.last_score = reward record_episode_score(task.task_id, episode_id, reward) observation = CodeObservation( stdout=feedback, stderr="", exit_code=0, task_id=task.task_id, difficulty=task.difficulty, task_description=( "Review this pull request and report the highest-impact issue " "with file_path, issue_type, severity, line_number, and evidence." ), code_snippet=format_task_prompt(task), pr_title=task.pr_title, pr_description=task.pr_description, changed_files="\n".join(task.changed_files), previous_feedback=feedback, reward=reward, done=True, ) return self._apply_transform(observation) @property def state(self) -> CodeState: """Get current environment state including last exit code.""" return self._state