Spaces:

h1manshu
/

code_review

Sleeping

App Files Files Community

code_review / server /code_review_environment.py

h1manshu

Upload folder using huggingface_hub

a0ea022 verified 11 days ago

raw

history blame contribute delete

6.97 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Code Review Environment Implementation.

	Supports three grader difficulty levels: "easy", "medium", "hard".
	Pass `grader_level` to the constructor to select the desired tier.
	"""

	from uuid import uuid4

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	try:
	from ..models import (
	CodeReviewAction,
	CodeReviewObservation,
	CodeReviewReward,
	CodeReviewPullRequest,
	CodeReviewStepResponse,
	)
	except ImportError:
	from models import (
	CodeReviewAction,
	CodeReviewObservation,
	CodeReviewReward,
	CodeReviewPullRequest,
	CodeReviewStepResponse,
	)

	import json
	from pathlib import Path

	try:
	from .graders import get_grader
	except ImportError:
	from graders import get_grader

	dataset_path = Path(__file__).parent.parent / "dataset" / "dataset.json"


	class CodeReviewEnvironment(Environment):
	"""
	Code Review environment with configurable grading difficulty.

	Args:
	grader_level: Grading difficulty — one of "easy", "medium", "hard".
	Defaults to "medium".

	Example:
	>>> env = CodeReviewEnvironment(grader_level="hard")
	>>> obs = env.reset()
	>>> obs = env.step(CodeReviewAction(action_type="final_decision", decision="approve"))
	"""

	SUPPORTS_CONCURRENT_SESSIONS: bool = True

	def __init__(self, grader_level: str = "medium"):
	"""Initialise the environment with the chosen grader tier."""
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._reset_count = 0
	self.max_steps = 5
	self.task_index = 0

	with open(dataset_path) as f:
	self.dataset = json.load(f)

	self.reset()

	def reset(self) -> CodeReviewObservation:
	"""Reset the environment and advance to the next task."""
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._reset_count += 1
	self.task_index += 1

	self.sample = self.dataset[self.task_index % len(self.dataset)]

	self.pr = CodeReviewPullRequest(**self.sample["pr"])
	self.gt = self.sample["ground_truth"]
	self.task_type = self.sample.get("task_type", "unknown")
	grader_level = self.task_type if self.task_type in ("easy", "medium", "hard") else "medium"
	self.grader = get_grader(grader_level)
	self.grader_level = grader_level

	self.history = []
	self.step_count = 0
	self.done = False
	self.issues_identified = []
	self.fix_attempted = False

	return CodeReviewObservation(
	pr=self.pr,
	previous_comments=self.history,
	step_count=self.step_count,
	max_steps=self.max_steps,
	reward=0.0,
	done=False,
	)

	def step(self, action: CodeReviewAction) -> CodeReviewStepResponse: # type: ignore[override]
	"""Execute one step: grade the action and return an observation + reward."""
	self._state.step_count += 1

	# ------------------------------------------------------------------
	# Normalise action into a CodeReviewAction object
	# ------------------------------------------------------------------
	try:
	if isinstance(action, dict):
	action = CodeReviewAction(**action)
	elif isinstance(action, (list, tuple)):
	action = CodeReviewAction(
	action_type=action[0],
	comment=action[1] if len(action) > 1 else None,
	suggested_code=action[2] if len(action) > 2 else None,
	decision=action[3] if len(action) > 3 else None,
	)
	elif isinstance(action, CodeReviewAction):
	pass
	else:
	raise ValueError(f"Unsupported action type: {type(action)}")
	except Exception as e:
	print(f"Error processing action: {e}")
	return self._invalid_step()

	# ------------------------------------------------------------------
	# Update state
	# ------------------------------------------------------------------
	self.step_count += 1
	self.history.append(action)

	if action.action_type == "comment" and action.comment:
	self.issues_identified.append(action.comment)

	if action.action_type == "suggest_fix":
	self.fix_attempted = True

	# ------------------------------------------------------------------
	# Score via the active grader
	# ------------------------------------------------------------------
	score = self.grader.grade_action(action, self.gt)
	bonus = self.grader.compute_step_bonus(action, self.step_count, self.history)

	score = max(0.01, min(score + bonus, 0.99))

	done = (
	action.action_type == "final_decision"
	or self.step_count >= self.max_steps
	)

	if done:
	score = self.grader.compute_done_score(self.history, self.gt)

	# ------------------------------------------------------------------
	# Build response
	# ------------------------------------------------------------------
	obs = CodeReviewObservation(
	pr=self.pr,
	previous_comments=[a.comment for a in self.history if a.comment],
	step_count=self.step_count,
	max_steps=self.max_steps,
	)

	rew = CodeReviewReward(score=score, feedback="graded")
	print(f"[{self.grader_level.upper()}] Step {self.step_count} — Score: {rew.score:.4f}")

	return CodeReviewStepResponse(
	observation=obs,
	reward=rew.score,
	done=done,
	info={
	"grader_level": self.grader_level,
	"task_type": self.task_type,
	"issues_identified": len(self.issues_identified),
	"fix_attempted": self.fix_attempted,
	},
	)

	@property
	def state(self) -> State:
	return self._state

	def _invalid_step(self) -> CodeReviewStepResponse:
	rew = CodeReviewReward(score=0.0, feedback="invalid action")
	obs = CodeReviewObservation(
	pr=self.pr,
	previous_comments=[a.comment for a in self.history if a.comment],
	step_count=self.step_count,
	max_steps=self.max_steps,
	)
	return CodeReviewStepResponse(
	observation=obs,
	reward=rew,
	done=True,
	info={"error": "invalid_action"},
	)