code_review / server /code_review_environment.py
h1manshu's picture
Upload folder using huggingface_hub
a0ea022 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Code Review Environment Implementation.
Supports three grader difficulty levels: "easy", "medium", "hard".
Pass `grader_level` to the constructor to select the desired tier.
"""
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
try:
from ..models import (
CodeReviewAction,
CodeReviewObservation,
CodeReviewReward,
CodeReviewPullRequest,
CodeReviewStepResponse,
)
except ImportError:
from models import (
CodeReviewAction,
CodeReviewObservation,
CodeReviewReward,
CodeReviewPullRequest,
CodeReviewStepResponse,
)
import json
from pathlib import Path
try:
from .graders import get_grader
except ImportError:
from graders import get_grader
dataset_path = Path(__file__).parent.parent / "dataset" / "dataset.json"
class CodeReviewEnvironment(Environment):
"""
Code Review environment with configurable grading difficulty.
Args:
grader_level: Grading difficulty — one of "easy", "medium", "hard".
Defaults to "medium".
Example:
>>> env = CodeReviewEnvironment(grader_level="hard")
>>> obs = env.reset()
>>> obs = env.step(CodeReviewAction(action_type="final_decision", decision="approve"))
"""
SUPPORTS_CONCURRENT_SESSIONS: bool = True
def __init__(self, grader_level: str = "medium"):
"""Initialise the environment with the chosen grader tier."""
self._state = State(episode_id=str(uuid4()), step_count=0)
self._reset_count = 0
self.max_steps = 5
self.task_index = 0
with open(dataset_path) as f:
self.dataset = json.load(f)
self.reset()
def reset(self) -> CodeReviewObservation:
"""Reset the environment and advance to the next task."""
self._state = State(episode_id=str(uuid4()), step_count=0)
self._reset_count += 1
self.task_index += 1
self.sample = self.dataset[self.task_index % len(self.dataset)]
self.pr = CodeReviewPullRequest(**self.sample["pr"])
self.gt = self.sample["ground_truth"]
self.task_type = self.sample.get("task_type", "unknown")
grader_level = self.task_type if self.task_type in ("easy", "medium", "hard") else "medium"
self.grader = get_grader(grader_level)
self.grader_level = grader_level
self.history = []
self.step_count = 0
self.done = False
self.issues_identified = []
self.fix_attempted = False
return CodeReviewObservation(
pr=self.pr,
previous_comments=self.history,
step_count=self.step_count,
max_steps=self.max_steps,
reward=0.0,
done=False,
)
def step(self, action: CodeReviewAction) -> CodeReviewStepResponse: # type: ignore[override]
"""Execute one step: grade the action and return an observation + reward."""
self._state.step_count += 1
# ------------------------------------------------------------------
# Normalise action into a CodeReviewAction object
# ------------------------------------------------------------------
try:
if isinstance(action, dict):
action = CodeReviewAction(**action)
elif isinstance(action, (list, tuple)):
action = CodeReviewAction(
action_type=action[0],
comment=action[1] if len(action) > 1 else None,
suggested_code=action[2] if len(action) > 2 else None,
decision=action[3] if len(action) > 3 else None,
)
elif isinstance(action, CodeReviewAction):
pass
else:
raise ValueError(f"Unsupported action type: {type(action)}")
except Exception as e:
print(f"Error processing action: {e}")
return self._invalid_step()
# ------------------------------------------------------------------
# Update state
# ------------------------------------------------------------------
self.step_count += 1
self.history.append(action)
if action.action_type == "comment" and action.comment:
self.issues_identified.append(action.comment)
if action.action_type == "suggest_fix":
self.fix_attempted = True
# ------------------------------------------------------------------
# Score via the active grader
# ------------------------------------------------------------------
score = self.grader.grade_action(action, self.gt)
bonus = self.grader.compute_step_bonus(action, self.step_count, self.history)
score = max(0.01, min(score + bonus, 0.99))
done = (
action.action_type == "final_decision"
or self.step_count >= self.max_steps
)
if done:
score = self.grader.compute_done_score(self.history, self.gt)
# ------------------------------------------------------------------
# Build response
# ------------------------------------------------------------------
obs = CodeReviewObservation(
pr=self.pr,
previous_comments=[a.comment for a in self.history if a.comment],
step_count=self.step_count,
max_steps=self.max_steps,
)
rew = CodeReviewReward(score=score, feedback="graded")
print(f"[{self.grader_level.upper()}] Step {self.step_count} — Score: {rew.score:.4f}")
return CodeReviewStepResponse(
observation=obs,
reward=rew.score,
done=done,
info={
"grader_level": self.grader_level,
"task_type": self.task_type,
"issues_identified": len(self.issues_identified),
"fix_attempted": self.fix_attempted,
},
)
@property
def state(self) -> State:
return self._state
def _invalid_step(self) -> CodeReviewStepResponse:
rew = CodeReviewReward(score=0.0, feedback="invalid action")
obs = CodeReviewObservation(
pr=self.pr,
previous_comments=[a.comment for a in self.history if a.comment],
step_count=self.step_count,
max_steps=self.max_steps,
)
return CodeReviewStepResponse(
observation=obs,
reward=rew,
done=True,
info={"error": "invalid_action"},
)