Spaces:
Sleeping
Sleeping
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """ | |
| Code Review Environment Implementation. | |
| Supports three grader difficulty levels: "easy", "medium", "hard". | |
| Pass `grader_level` to the constructor to select the desired tier. | |
| """ | |
| from uuid import uuid4 | |
| from openenv.core.env_server.interfaces import Environment | |
| from openenv.core.env_server.types import State | |
| try: | |
| from ..models import ( | |
| CodeReviewAction, | |
| CodeReviewObservation, | |
| CodeReviewReward, | |
| CodeReviewPullRequest, | |
| CodeReviewStepResponse, | |
| ) | |
| except ImportError: | |
| from models import ( | |
| CodeReviewAction, | |
| CodeReviewObservation, | |
| CodeReviewReward, | |
| CodeReviewPullRequest, | |
| CodeReviewStepResponse, | |
| ) | |
| import json | |
| from pathlib import Path | |
| try: | |
| from .graders import get_grader | |
| except ImportError: | |
| from graders import get_grader | |
| dataset_path = Path(__file__).parent.parent / "dataset" / "dataset.json" | |
| class CodeReviewEnvironment(Environment): | |
| """ | |
| Code Review environment with configurable grading difficulty. | |
| Args: | |
| grader_level: Grading difficulty — one of "easy", "medium", "hard". | |
| Defaults to "medium". | |
| Example: | |
| >>> env = CodeReviewEnvironment(grader_level="hard") | |
| >>> obs = env.reset() | |
| >>> obs = env.step(CodeReviewAction(action_type="final_decision", decision="approve")) | |
| """ | |
| SUPPORTS_CONCURRENT_SESSIONS: bool = True | |
| def __init__(self, grader_level: str = "medium"): | |
| """Initialise the environment with the chosen grader tier.""" | |
| self._state = State(episode_id=str(uuid4()), step_count=0) | |
| self._reset_count = 0 | |
| self.max_steps = 5 | |
| self.task_index = 0 | |
| with open(dataset_path) as f: | |
| self.dataset = json.load(f) | |
| self.reset() | |
| def reset(self) -> CodeReviewObservation: | |
| """Reset the environment and advance to the next task.""" | |
| self._state = State(episode_id=str(uuid4()), step_count=0) | |
| self._reset_count += 1 | |
| self.task_index += 1 | |
| self.sample = self.dataset[self.task_index % len(self.dataset)] | |
| self.pr = CodeReviewPullRequest(**self.sample["pr"]) | |
| self.gt = self.sample["ground_truth"] | |
| self.task_type = self.sample.get("task_type", "unknown") | |
| grader_level = self.task_type if self.task_type in ("easy", "medium", "hard") else "medium" | |
| self.grader = get_grader(grader_level) | |
| self.grader_level = grader_level | |
| self.history = [] | |
| self.step_count = 0 | |
| self.done = False | |
| self.issues_identified = [] | |
| self.fix_attempted = False | |
| return CodeReviewObservation( | |
| pr=self.pr, | |
| previous_comments=self.history, | |
| step_count=self.step_count, | |
| max_steps=self.max_steps, | |
| reward=0.0, | |
| done=False, | |
| ) | |
| def step(self, action: CodeReviewAction) -> CodeReviewStepResponse: # type: ignore[override] | |
| """Execute one step: grade the action and return an observation + reward.""" | |
| self._state.step_count += 1 | |
| # ------------------------------------------------------------------ | |
| # Normalise action into a CodeReviewAction object | |
| # ------------------------------------------------------------------ | |
| try: | |
| if isinstance(action, dict): | |
| action = CodeReviewAction(**action) | |
| elif isinstance(action, (list, tuple)): | |
| action = CodeReviewAction( | |
| action_type=action[0], | |
| comment=action[1] if len(action) > 1 else None, | |
| suggested_code=action[2] if len(action) > 2 else None, | |
| decision=action[3] if len(action) > 3 else None, | |
| ) | |
| elif isinstance(action, CodeReviewAction): | |
| pass | |
| else: | |
| raise ValueError(f"Unsupported action type: {type(action)}") | |
| except Exception as e: | |
| print(f"Error processing action: {e}") | |
| return self._invalid_step() | |
| # ------------------------------------------------------------------ | |
| # Update state | |
| # ------------------------------------------------------------------ | |
| self.step_count += 1 | |
| self.history.append(action) | |
| if action.action_type == "comment" and action.comment: | |
| self.issues_identified.append(action.comment) | |
| if action.action_type == "suggest_fix": | |
| self.fix_attempted = True | |
| # ------------------------------------------------------------------ | |
| # Score via the active grader | |
| # ------------------------------------------------------------------ | |
| score = self.grader.grade_action(action, self.gt) | |
| bonus = self.grader.compute_step_bonus(action, self.step_count, self.history) | |
| score = max(0.01, min(score + bonus, 0.99)) | |
| done = ( | |
| action.action_type == "final_decision" | |
| or self.step_count >= self.max_steps | |
| ) | |
| if done: | |
| score = self.grader.compute_done_score(self.history, self.gt) | |
| # ------------------------------------------------------------------ | |
| # Build response | |
| # ------------------------------------------------------------------ | |
| obs = CodeReviewObservation( | |
| pr=self.pr, | |
| previous_comments=[a.comment for a in self.history if a.comment], | |
| step_count=self.step_count, | |
| max_steps=self.max_steps, | |
| ) | |
| rew = CodeReviewReward(score=score, feedback="graded") | |
| print(f"[{self.grader_level.upper()}] Step {self.step_count} — Score: {rew.score:.4f}") | |
| return CodeReviewStepResponse( | |
| observation=obs, | |
| reward=rew.score, | |
| done=done, | |
| info={ | |
| "grader_level": self.grader_level, | |
| "task_type": self.task_type, | |
| "issues_identified": len(self.issues_identified), | |
| "fix_attempted": self.fix_attempted, | |
| }, | |
| ) | |
| def state(self) -> State: | |
| return self._state | |
| def _invalid_step(self) -> CodeReviewStepResponse: | |
| rew = CodeReviewReward(score=0.0, feedback="invalid action") | |
| obs = CodeReviewObservation( | |
| pr=self.pr, | |
| previous_comments=[a.comment for a in self.history if a.comment], | |
| step_count=self.step_count, | |
| max_steps=self.max_steps, | |
| ) | |
| return CodeReviewStepResponse( | |
| observation=obs, | |
| reward=rew, | |
| done=True, | |
| info={"error": "invalid_action"}, | |
| ) |