SimranShaikh commited on
Commit
0ef4206
Β·
verified Β·
1 Parent(s): 8ccf46e
Files changed (1) hide show
  1. environment/env.py +135 -0
environment/env.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CodeReviewEnv β€” main environment logic.
3
+ Manages state, episode flow, reward accumulation, and per-step grading.
4
+ """
5
+ from typing import Optional, List, Dict, Any
6
+
7
+ from environment.models import (
8
+ CodeReviewAction,
9
+ CodeReviewObservation,
10
+ StepResult,
11
+ ResetResult,
12
+ StateResult,
13
+ )
14
+ from environment.tasks import get_task, list_tasks as _list_tasks
15
+ from environment.graders import grade
16
+
17
+
18
+ class CodeReviewEnv:
19
+ """
20
+ OpenEnv-compliant code-review environment.
21
+
22
+ Episode flow
23
+ ────────────
24
+ reset(task_id) β†’ observation
25
+ step(action) β†’ (observation, reward, done, info) [repeated ≀ max_steps]
26
+ state() β†’ current state snapshot
27
+ """
28
+
29
+ def __init__(self) -> None:
30
+ self._task: Optional[dict] = None
31
+ self._step_number: int = 0
32
+ self._total_reward: float = 0.0
33
+ self._actions_history: List[Dict[str, Any]] = []
34
+ self._done: bool = False
35
+ self._initialized: bool = False
36
+ self._last_feedback: Optional[str] = None
37
+
38
+ # ── public properties ────────────────────────────────────────────────────
39
+
40
+ @property
41
+ def is_initialized(self) -> bool:
42
+ return self._initialized
43
+
44
+ # ── core API ─────────────────────────────────────────────────────────────
45
+
46
+ def reset(self, task_id: Optional[str] = None) -> ResetResult:
47
+ """Start a new episode. Defaults to the easy task."""
48
+ if task_id is None:
49
+ task_id = "easy_syntax"
50
+
51
+ self._task = get_task(task_id)
52
+ self._step_number = 0
53
+ self._total_reward = 0.0
54
+ self._actions_history = []
55
+ self._done = False
56
+ self._initialized = True
57
+ self._last_feedback = None
58
+
59
+ obs = self._make_observation()
60
+ return ResetResult(observation=obs)
61
+
62
+ def step(self, action: CodeReviewAction) -> StepResult:
63
+ """Process one agent action and return (observation, reward, done, info)."""
64
+ if not self._initialized or self._done:
65
+ raise RuntimeError("Call reset() before stepping, or episode is over.")
66
+
67
+ self._step_number += 1
68
+ task_id = self._task["task_id"]
69
+ ground_truth = self._task["ground_truth"]
70
+
71
+ # Grade the action
72
+ reward, feedback = grade(task_id, action, ground_truth)
73
+ self._last_feedback = feedback
74
+ self._total_reward += reward
75
+
76
+ # Record history
77
+ self._actions_history.append(
78
+ {
79
+ "step": self._step_number,
80
+ "num_issues_reported": len(action.identified_issues),
81
+ "has_fix": action.suggested_fix is not None,
82
+ "reward": reward,
83
+ }
84
+ )
85
+
86
+ # Episode ends when: agent says done, reward is perfect, or max steps reached
87
+ max_steps = self._task["max_steps"]
88
+ done = action.done or reward >= 0.95 or self._step_number >= max_steps
89
+ self._done = done
90
+
91
+ obs = self._make_observation()
92
+
93
+ return StepResult(
94
+ observation=obs,
95
+ reward=reward,
96
+ done=done,
97
+ info={
98
+ "feedback": feedback,
99
+ "step": self._step_number,
100
+ "total_reward": round(self._total_reward, 4),
101
+ "cumulative_score": round(
102
+ self._total_reward / max(self._step_number, 1), 4
103
+ ),
104
+ },
105
+ )
106
+
107
+ def get_state(self) -> StateResult:
108
+ """Return a snapshot of the current episode state."""
109
+ return StateResult(
110
+ task_id=self._task["task_id"] if self._task else "",
111
+ step_number=self._step_number,
112
+ total_reward=round(self._total_reward, 4),
113
+ actions_history=self._actions_history,
114
+ done=self._done,
115
+ initialized=self._initialized,
116
+ )
117
+
118
+ def list_tasks(self) -> list:
119
+ return _list_tasks()
120
+
121
+ # ── internal helpers ─────────────────────────────────────────────────────
122
+
123
+ def _make_observation(self) -> CodeReviewObservation:
124
+ t = self._task
125
+ return CodeReviewObservation(
126
+ task_id=t["task_id"],
127
+ task_name=t["task_name"],
128
+ difficulty=t["difficulty"],
129
+ language=t["language"],
130
+ code_snippet=t["code_snippet"],
131
+ context=t["context"],
132
+ step_number=self._step_number,
133
+ max_steps=t["max_steps"],
134
+ previous_feedback=self._last_feedback,
135
+ )