from uuid import uuid4 from typing import Optional from openenv.core.env_server.interfaces import Environment from openenv.core.env_server.types import State from environment.models import CodeReviewAction, CodeReviewObservation from environment.tasks import get_task from environment.graders import grade class CodeReviewEnv(Environment): SUPPORTS_CONCURRENT_SESSIONS: bool = False def __init__(self): self._state = State(episode_id=str(uuid4()), step_count=0) self._task = get_task("easy_syntax") self._last_feedback: Optional[str] = None def reset(self) -> CodeReviewObservation: self._task = get_task("easy_syntax") self._state = State(episode_id=str(uuid4()), step_count=0) self._last_feedback = None return CodeReviewObservation( task_id=self._task["task_id"], task_name=self._task["task_name"], difficulty=self._task["difficulty"], language=self._task["language"], code_snippet=self._task["code_snippet"], context=self._task["context"], step_number=0, max_steps=self._task["max_steps"], previous_feedback=None, done=False, reward=0.0, ) def step(self, action: CodeReviewAction) -> CodeReviewObservation: self._state.step_count += 1 reward, feedback = grade( self._task["task_id"], action, self._task["ground_truth"] ) self._last_feedback = feedback max_steps = self._task["max_steps"] done = action.submit or reward >= 0.95 or self._state.step_count >= max_steps return CodeReviewObservation( task_id=self._task["task_id"], task_name=self._task["task_name"], difficulty=self._task["difficulty"], language=self._task["language"], code_snippet=self._task["code_snippet"], context=self._task["context"], step_number=self._state.step_count, max_steps=max_steps, previous_feedback=feedback, done=done, reward=reward, ) @property def state(self) -> State: return self._state