havinashpatil commited on
Commit
5dffd52
·
1 Parent(s): 9d429ce

Clean notebook outputs and add Colab warning note

Browse files
Files changed (1) hide show
  1. train_grpo.ipynb +12 -216
train_grpo.ipynb CHANGED
@@ -10,192 +10,20 @@
10
  "It uses the `m-a-p/Code-Feedback` dataset to train the LLM for coding debugging and improving time complexity."
11
  ]
12
  },
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "execution_count": 4,
16
  "metadata": {},
17
- "outputs": [
18
- {
19
- "name": "stdout",
20
- "output_type": "stream",
21
- "text": [
22
- "Requirement already satisfied: trl in .\\venv\\lib\\site-packages (1.2.0)\n",
23
- "Requirement already satisfied: transformers in .\\venv\\lib\\site-packages (5.6.2)\n",
24
- "Requirement already satisfied: datasets in .\\venv\\lib\\site-packages (4.8.4)\n",
25
- "Requirement already satisfied: httpx in .\\venv\\lib\\site-packages (0.28.1)\n",
26
- "Requirement already satisfied: fastapi in .\\venv\\lib\\site-packages (0.136.0)\n",
27
- "Requirement already satisfied: uvicorn in .\\venv\\lib\\site-packages (0.45.0)\n",
28
- "Requirement already satisfied: pydantic in .\\venv\\lib\\site-packages (2.13.3)\n",
29
- "Requirement already satisfied: openai in .\\venv\\lib\\site-packages (2.32.0)\n",
30
- "Requirement already satisfied: accelerate>=1.4.0 in .\\venv\\lib\\site-packages (from trl) (1.13.0)\n",
31
- "Requirement already satisfied: jinja2 in .\\venv\\lib\\site-packages (from trl) (3.1.6)\n",
32
- "Requirement already satisfied: packaging>20.0 in .\\venv\\lib\\site-packages (from trl) (26.2)\n",
33
- "Requirement already satisfied: huggingface-hub<2.0,>=1.5.0 in .\\venv\\lib\\site-packages (from transformers) (1.12.0)\n",
34
- "Requirement already satisfied: numpy>=1.17 in .\\venv\\lib\\site-packages (from transformers) (2.4.4)\n",
35
- "Requirement already satisfied: pyyaml>=5.1 in .\\venv\\lib\\site-packages (from transformers) (6.0.3)\n",
36
- "Requirement already satisfied: regex>=2025.10.22 in .\\venv\\lib\\site-packages (from transformers) (2026.4.4)\n",
37
- "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in .\\venv\\lib\\site-packages (from transformers) (0.22.2)\n",
38
- "Requirement already satisfied: typer in .\\venv\\lib\\site-packages (from transformers) (0.24.2)\n",
39
- "Requirement already satisfied: safetensors>=0.4.3 in .\\venv\\lib\\site-packages (from transformers) (0.7.0)\n",
40
- "Requirement already satisfied: tqdm>=4.27 in .\\venv\\lib\\site-packages (from transformers) (4.67.3)\n",
41
- "Requirement already satisfied: filelock>=3.10.0 in .\\venv\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (3.29.0)\n",
42
- "Requirement already satisfied: fsspec>=2023.5.0 in .\\venv\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (2026.2.0)\n",
43
- "Requirement already satisfied: hf-xet<2.0.0,>=1.4.3 in .\\venv\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (1.4.3)\n",
44
- "Requirement already satisfied: typing-extensions>=4.1.0 in .\\venv\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (4.15.0)\n",
45
- "Requirement already satisfied: anyio in .\\venv\\lib\\site-packages (from httpx) (4.13.0)\n",
46
- "Requirement already satisfied: certifi in .\\venv\\lib\\site-packages (from httpx) (2026.4.22)\n",
47
- "Requirement already satisfied: httpcore==1.* in .\\venv\\lib\\site-packages (from httpx) (1.0.9)\n",
48
- "Requirement already satisfied: idna in .\\venv\\lib\\site-packages (from httpx) (3.12)\n",
49
- "Requirement already satisfied: h11>=0.16 in .\\venv\\lib\\site-packages (from httpcore==1.*->httpx) (0.16.0)\n",
50
- "Requirement already satisfied: pyarrow>=21.0.0 in .\\venv\\lib\\site-packages (from datasets) (24.0.0)\n",
51
- "Requirement already satisfied: dill<0.4.2,>=0.3.0 in .\\venv\\lib\\site-packages (from datasets) (0.4.1)\n",
52
- "Requirement already satisfied: pandas in .\\venv\\lib\\site-packages (from datasets) (3.0.2)\n",
53
- "Requirement already satisfied: requests>=2.32.2 in .\\venv\\lib\\site-packages (from datasets) (2.33.1)\n",
54
- "Requirement already satisfied: xxhash in .\\venv\\lib\\site-packages (from datasets) (3.6.0)\n",
55
- "Requirement already satisfied: multiprocess<0.70.20 in .\\venv\\lib\\site-packages (from datasets) (0.70.19)\n",
56
- "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in .\\venv\\lib\\site-packages (from fsspec[http]<=2026.2.0,>=2023.1.0->datasets) (3.13.5)\n",
57
- "Requirement already satisfied: starlette>=0.46.0 in .\\venv\\lib\\site-packages (from fastapi) (1.0.0)\n",
58
- "Requirement already satisfied: typing-inspection>=0.4.2 in .\\venv\\lib\\site-packages (from fastapi) (0.4.2)\n",
59
- "Requirement already satisfied: annotated-doc>=0.0.2 in .\\venv\\lib\\site-packages (from fastapi) (0.0.4)\n",
60
- "Requirement already satisfied: click>=7.0 in .\\venv\\lib\\site-packages (from uvicorn) (8.3.2)\n",
61
- "Requirement already satisfied: annotated-types>=0.6.0 in .\\venv\\lib\\site-packages (from pydantic) (0.7.0)\n",
62
- "Requirement already satisfied: pydantic-core==2.46.3 in .\\venv\\lib\\site-packages (from pydantic) (2.46.3)\n",
63
- "Requirement already satisfied: distro<2,>=1.7.0 in .\\venv\\lib\\site-packages (from openai) (1.9.0)\n",
64
- "Requirement already satisfied: jiter<1,>=0.10.0 in .\\venv\\lib\\site-packages (from openai) (0.14.0)\n",
65
- "Requirement already satisfied: sniffio in .\\venv\\lib\\site-packages (from openai) (1.3.1)\n",
66
- "Requirement already satisfied: psutil in .\\venv\\lib\\site-packages (from accelerate>=1.4.0->trl) (7.2.2)\n",
67
- "Requirement already satisfied: torch>=2.0.0 in .\\venv\\lib\\site-packages (from accelerate>=1.4.0->trl) (2.11.0)\n",
68
- "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in .\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets) (2.6.1)\n",
69
- "Requirement already satisfied: aiosignal>=1.4.0 in .\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets) (1.4.0)\n",
70
- "Requirement already satisfied: attrs>=17.3.0 in .\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets) (26.1.0)\n",
71
- "Requirement already satisfied: frozenlist>=1.1.1 in .\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets) (1.8.0)\n",
72
- "Requirement already satisfied: multidict<7.0,>=4.5 in .\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets) (6.7.1)\n",
73
- "Requirement already satisfied: propcache>=0.2.0 in .\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets) (0.4.1)\n",
74
- "Requirement already satisfied: yarl<2.0,>=1.17.0 in .\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets) (1.23.0)\n",
75
- "Requirement already satisfied: colorama in .\\venv\\lib\\site-packages (from click>=7.0->uvicorn) (0.4.6)\n",
76
- "Requirement already satisfied: charset_normalizer<4,>=2 in .\\venv\\lib\\site-packages (from requests>=2.32.2->datasets) (3.4.7)\n",
77
- "Requirement already satisfied: urllib3<3,>=1.26 in .\\venv\\lib\\site-packages (from requests>=2.32.2->datasets) (2.6.3)\n",
78
- "Requirement already satisfied: setuptools<82 in .\\venv\\lib\\site-packages (from torch>=2.0.0->accelerate>=1.4.0->trl) (81.0.0)\n",
79
- "Requirement already satisfied: sympy>=1.13.3 in .\\venv\\lib\\site-packages (from torch>=2.0.0->accelerate>=1.4.0->trl) (1.14.0)\n",
80
- "Requirement already satisfied: networkx>=2.5.1 in .\\venv\\lib\\site-packages (from torch>=2.0.0->accelerate>=1.4.0->trl) (3.6.1)\n",
81
- "Requirement already satisfied: mpmath<1.4,>=1.1.0 in .\\venv\\lib\\site-packages (from sympy>=1.13.3->torch>=2.0.0->accelerate>=1.4.0->trl) (1.3.0)\n",
82
- "Requirement already satisfied: MarkupSafe>=2.0 in .\\venv\\lib\\site-packages (from jinja2->trl) (3.0.3)\n",
83
- "Requirement already satisfied: python-dateutil>=2.8.2 in .\\venv\\lib\\site-packages (from pandas->datasets) (2.9.0.post0)\n",
84
- "Requirement already satisfied: tzdata in .\\venv\\lib\\site-packages (from pandas->datasets) (2026.2)\n",
85
- "Requirement already satisfied: six>=1.5 in .\\venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
86
- "Requirement already satisfied: shellingham>=1.3.0 in .\\venv\\lib\\site-packages (from typer->transformers) (1.5.4)\n",
87
- "Requirement already satisfied: rich>=12.3.0 in .\\venv\\lib\\site-packages (from typer->transformers) (15.0.0)\n",
88
- "Requirement already satisfied: markdown-it-py>=2.2.0 in .\\venv\\lib\\site-packages (from rich>=12.3.0->typer->transformers) (4.0.0)\n",
89
- "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in .\\venv\\lib\\site-packages (from rich>=12.3.0->typer->transformers) (2.20.0)\n",
90
- "Requirement already satisfied: mdurl~=0.1 in .\\venv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=12.3.0->typer->transformers) (0.1.2)\n"
91
- ]
92
- },
93
- {
94
- "name": "stderr",
95
- "output_type": "stream",
96
- "text": [
97
- "\n",
98
- "[notice] A new release of pip is available: 25.2 -> 26.0.1\n",
99
- "[notice] To update, run: python.exe -m pip install --upgrade pip\n",
100
- "fatal: destination path 'meta' already exists and is not an empty directory.\n"
101
- ]
102
- },
103
- {
104
- "name": "stdout",
105
- "output_type": "stream",
106
- "text": [
107
- "Requirement already satisfied: fastapi>=0.100.0 in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 1)) (0.136.0)\n",
108
- "Requirement already satisfied: uvicorn>=0.23.0 in e:\\meta\\venv\\lib\\site-packages (from uvicorn[standard]>=0.23.0->-r requirements.txt (line 2)) (0.45.0)\n",
109
- "Requirement already satisfied: pydantic>=2.0.0 in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 3)) (2.13.3)\n",
110
- "Requirement already satisfied: openai>=1.0.0 in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 4)) (2.32.0)\n",
111
- "Requirement already satisfied: httpx>=0.24.1 in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 5)) (0.28.1)\n",
112
- "Requirement already satisfied: pandas in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 6)) (3.0.2)\n",
113
- "Requirement already satisfied: matplotlib in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 7)) (3.10.9)\n",
114
- "Requirement already satisfied: transformers in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 8)) (5.6.2)\n",
115
- "Requirement already satisfied: torch in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 9)) (2.11.0)\n",
116
- "Requirement already satisfied: datasets in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 10)) (4.8.4)\n",
117
- "Requirement already satisfied: trl in e:\\meta\\venv\\lib\\site-packages (from -r requirements.txt (line 11)) (1.2.0)\n",
118
- "Requirement already satisfied: starlette>=0.46.0 in e:\\meta\\venv\\lib\\site-packages (from fastapi>=0.100.0->-r requirements.txt (line 1)) (1.0.0)\n",
119
- "Requirement already satisfied: typing-extensions>=4.8.0 in e:\\meta\\venv\\lib\\site-packages (from fastapi>=0.100.0->-r requirements.txt (line 1)) (4.15.0)\n",
120
- "Requirement already satisfied: typing-inspection>=0.4.2 in e:\\meta\\venv\\lib\\site-packages (from fastapi>=0.100.0->-r requirements.txt (line 1)) (0.4.2)\n",
121
- "Requirement already satisfied: annotated-doc>=0.0.2 in e:\\meta\\venv\\lib\\site-packages (from fastapi>=0.100.0->-r requirements.txt (line 1)) (0.0.4)\n",
122
- "Requirement already satisfied: click>=7.0 in e:\\meta\\venv\\lib\\site-packages (from uvicorn>=0.23.0->uvicorn[standard]>=0.23.0->-r requirements.txt (line 2)) (8.3.2)\n",
123
- "Requirement already satisfied: h11>=0.8 in e:\\meta\\venv\\lib\\site-packages (from uvicorn>=0.23.0->uvicorn[standard]>=0.23.0->-r requirements.txt (line 2)) (0.16.0)\n",
124
- "Requirement already satisfied: annotated-types>=0.6.0 in e:\\meta\\venv\\lib\\site-packages (from pydantic>=2.0.0->-r requirements.txt (line 3)) (0.7.0)\n",
125
- "Requirement already satisfied: pydantic-core==2.46.3 in e:\\meta\\venv\\lib\\site-packages (from pydantic>=2.0.0->-r requirements.txt (line 3)) (2.46.3)\n",
126
- "Requirement already satisfied: anyio<5,>=3.5.0 in e:\\meta\\venv\\lib\\site-packages (from openai>=1.0.0->-r requirements.txt (line 4)) (4.13.0)\n",
127
- "Requirement already satisfied: distro<2,>=1.7.0 in e:\\meta\\venv\\lib\\site-packages (from openai>=1.0.0->-r requirements.txt (line 4)) (1.9.0)\n",
128
- "Requirement already satisfied: jiter<1,>=0.10.0 in e:\\meta\\venv\\lib\\site-packages (from openai>=1.0.0->-r requirements.txt (line 4)) (0.14.0)\n",
129
- "Requirement already satisfied: sniffio in e:\\meta\\venv\\lib\\site-packages (from openai>=1.0.0->-r requirements.txt (line 4)) (1.3.1)\n",
130
- "Requirement already satisfied: tqdm>4 in e:\\meta\\venv\\lib\\site-packages (from openai>=1.0.0->-r requirements.txt (line 4)) (4.67.3)\n",
131
- "Requirement already satisfied: certifi in e:\\meta\\venv\\lib\\site-packages (from httpx>=0.24.1->-r requirements.txt (line 5)) (2026.4.22)\n",
132
- "Requirement already satisfied: httpcore==1.* in e:\\meta\\venv\\lib\\site-packages (from httpx>=0.24.1->-r requirements.txt (line 5)) (1.0.9)\n",
133
- "Requirement already satisfied: idna in e:\\meta\\venv\\lib\\site-packages (from httpx>=0.24.1->-r requirements.txt (line 5)) (3.12)\n",
134
- "Requirement already satisfied: numpy>=1.26.0 in e:\\meta\\venv\\lib\\site-packages (from pandas->-r requirements.txt (line 6)) (2.4.4)\n",
135
- "Requirement already satisfied: python-dateutil>=2.8.2 in e:\\meta\\venv\\lib\\site-packages (from pandas->-r requirements.txt (line 6)) (2.9.0.post0)\n",
136
- "Requirement already satisfied: tzdata in e:\\meta\\venv\\lib\\site-packages (from pandas->-r requirements.txt (line 6)) (2026.2)\n",
137
- "Requirement already satisfied: contourpy>=1.0.1 in e:\\meta\\venv\\lib\\site-packages (from matplotlib->-r requirements.txt (line 7)) (1.3.3)\n",
138
- "Requirement already satisfied: cycler>=0.10 in e:\\meta\\venv\\lib\\site-packages (from matplotlib->-r requirements.txt (line 7)) (0.12.1)\n",
139
- "Requirement already satisfied: fonttools>=4.22.0 in e:\\meta\\venv\\lib\\site-packages (from matplotlib->-r requirements.txt (line 7)) (4.62.1)\n",
140
- "Requirement already satisfied: kiwisolver>=1.3.1 in e:\\meta\\venv\\lib\\site-packages (from matplotlib->-r requirements.txt (line 7)) (1.5.0)\n",
141
- "Requirement already satisfied: packaging>=20.0 in e:\\meta\\venv\\lib\\site-packages (from matplotlib->-r requirements.txt (line 7)) (26.2)\n",
142
- "Requirement already satisfied: pillow>=8 in e:\\meta\\venv\\lib\\site-packages (from matplotlib->-r requirements.txt (line 7)) (12.2.0)\n",
143
- "Requirement already satisfied: pyparsing>=3 in e:\\meta\\venv\\lib\\site-packages (from matplotlib->-r requirements.txt (line 7)) (3.3.2)\n",
144
- "Requirement already satisfied: huggingface-hub<2.0,>=1.5.0 in e:\\meta\\venv\\lib\\site-packages (from transformers->-r requirements.txt (line 8)) (1.12.0)\n",
145
- "Requirement already satisfied: pyyaml>=5.1 in e:\\meta\\venv\\lib\\site-packages (from transformers->-r requirements.txt (line 8)) (6.0.3)\n",
146
- "Requirement already satisfied: regex>=2025.10.22 in e:\\meta\\venv\\lib\\site-packages (from transformers->-r requirements.txt (line 8)) (2026.4.4)\n",
147
- "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in e:\\meta\\venv\\lib\\site-packages (from transformers->-r requirements.txt (line 8)) (0.22.2)\n",
148
- "Requirement already satisfied: typer in e:\\meta\\venv\\lib\\site-packages (from transformers->-r requirements.txt (line 8)) (0.24.2)\n",
149
- "Requirement already satisfied: safetensors>=0.4.3 in e:\\meta\\venv\\lib\\site-packages (from transformers->-r requirements.txt (line 8)) (0.7.0)\n",
150
- "Requirement already satisfied: filelock>=3.10.0 in e:\\meta\\venv\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers->-r requirements.txt (line 8)) (3.29.0)\n",
151
- "Requirement already satisfied: fsspec>=2023.5.0 in e:\\meta\\venv\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers->-r requirements.txt (line 8)) (2026.2.0)\n",
152
- "Requirement already satisfied: hf-xet<2.0.0,>=1.4.3 in e:\\meta\\venv\\lib\\site-packages (from huggingface-hub<2.0,>=1.5.0->transformers->-r requirements.txt (line 8)) (1.4.3)\n",
153
- "Requirement already satisfied: setuptools<82 in e:\\meta\\venv\\lib\\site-packages (from torch->-r requirements.txt (line 9)) (81.0.0)\n",
154
- "Requirement already satisfied: sympy>=1.13.3 in e:\\meta\\venv\\lib\\site-packages (from torch->-r requirements.txt (line 9)) (1.14.0)\n",
155
- "Requirement already satisfied: networkx>=2.5.1 in e:\\meta\\venv\\lib\\site-packages (from torch->-r requirements.txt (line 9)) (3.6.1)\n",
156
- "Requirement already satisfied: jinja2 in e:\\meta\\venv\\lib\\site-packages (from torch->-r requirements.txt (line 9)) (3.1.6)\n",
157
- "Requirement already satisfied: pyarrow>=21.0.0 in e:\\meta\\venv\\lib\\site-packages (from datasets->-r requirements.txt (line 10)) (24.0.0)\n",
158
- "Requirement already satisfied: dill<0.4.2,>=0.3.0 in e:\\meta\\venv\\lib\\site-packages (from datasets->-r requirements.txt (line 10)) (0.4.1)\n",
159
- "Requirement already satisfied: requests>=2.32.2 in e:\\meta\\venv\\lib\\site-packages (from datasets->-r requirements.txt (line 10)) (2.33.1)\n",
160
- "Requirement already satisfied: xxhash in e:\\meta\\venv\\lib\\site-packages (from datasets->-r requirements.txt (line 10)) (3.6.0)\n",
161
- "Requirement already satisfied: multiprocess<0.70.20 in e:\\meta\\venv\\lib\\site-packages (from datasets->-r requirements.txt (line 10)) (0.70.19)\n",
162
- "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in e:\\meta\\venv\\lib\\site-packages (from fsspec[http]<=2026.2.0,>=2023.1.0->datasets->-r requirements.txt (line 10)) (3.13.5)\n",
163
- "Requirement already satisfied: accelerate>=1.4.0 in e:\\meta\\venv\\lib\\site-packages (from trl->-r requirements.txt (line 11)) (1.13.0)\n",
164
- "Requirement already satisfied: psutil in e:\\meta\\venv\\lib\\site-packages (from accelerate>=1.4.0->trl->-r requirements.txt (line 11)) (7.2.2)\n",
165
- "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in e:\\meta\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets->-r requirements.txt (line 10)) (2.6.1)\n",
166
- "Requirement already satisfied: aiosignal>=1.4.0 in e:\\meta\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets->-r requirements.txt (line 10)) (1.4.0)\n",
167
- "Requirement already satisfied: attrs>=17.3.0 in e:\\meta\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets->-r requirements.txt (line 10)) (26.1.0)\n",
168
- "Requirement already satisfied: frozenlist>=1.1.1 in e:\\meta\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets->-r requirements.txt (line 10)) (1.8.0)\n",
169
- "Requirement already satisfied: multidict<7.0,>=4.5 in e:\\meta\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets->-r requirements.txt (line 10)) (6.7.1)\n",
170
- "Requirement already satisfied: propcache>=0.2.0 in e:\\meta\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets->-r requirements.txt (line 10)) (0.4.1)\n",
171
- "Requirement already satisfied: yarl<2.0,>=1.17.0 in e:\\meta\\venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2026.2.0,>=2023.1.0->datasets->-r requirements.txt (line 10)) (1.23.0)\n",
172
- "Requirement already satisfied: colorama in e:\\meta\\venv\\lib\\site-packages (from click>=7.0->uvicorn>=0.23.0->uvicorn[standard]>=0.23.0->-r requirements.txt (line 2)) (0.4.6)\n",
173
- "Requirement already satisfied: six>=1.5 in e:\\meta\\venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas->-r requirements.txt (line 6)) (1.17.0)\n",
174
- "Requirement already satisfied: charset_normalizer<4,>=2 in e:\\meta\\venv\\lib\\site-packages (from requests>=2.32.2->datasets->-r requirements.txt (line 10)) (3.4.7)\n",
175
- "Requirement already satisfied: urllib3<3,>=1.26 in e:\\meta\\venv\\lib\\site-packages (from requests>=2.32.2->datasets->-r requirements.txt (line 10)) (2.6.3)\n",
176
- "Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\\meta\\venv\\lib\\site-packages (from sympy>=1.13.3->torch->-r requirements.txt (line 9)) (1.3.0)\n",
177
- "Requirement already satisfied: httptools>=0.6.3 in e:\\meta\\venv\\lib\\site-packages (from uvicorn[standard]>=0.23.0->-r requirements.txt (line 2)) (0.7.1)\n",
178
- "Requirement already satisfied: python-dotenv>=0.13 in e:\\meta\\venv\\lib\\site-packages (from uvicorn[standard]>=0.23.0->-r requirements.txt (line 2)) (1.2.2)\n",
179
- "Requirement already satisfied: watchfiles>=0.20 in e:\\meta\\venv\\lib\\site-packages (from uvicorn[standard]>=0.23.0->-r requirements.txt (line 2)) (1.1.1)\n",
180
- "Requirement already satisfied: websockets>=10.4 in e:\\meta\\venv\\lib\\site-packages (from uvicorn[standard]>=0.23.0->-r requirements.txt (line 2)) (16.0)\n",
181
- "Requirement already satisfied: MarkupSafe>=2.0 in e:\\meta\\venv\\lib\\site-packages (from jinja2->torch->-r requirements.txt (line 9)) (3.0.3)\n",
182
- "Requirement already satisfied: shellingham>=1.3.0 in e:\\meta\\venv\\lib\\site-packages (from typer->transformers->-r requirements.txt (line 8)) (1.5.4)\n",
183
- "Requirement already satisfied: rich>=12.3.0 in e:\\meta\\venv\\lib\\site-packages (from typer->transformers->-r requirements.txt (line 8)) (15.0.0)\n",
184
- "Requirement already satisfied: markdown-it-py>=2.2.0 in e:\\meta\\venv\\lib\\site-packages (from rich>=12.3.0->typer->transformers->-r requirements.txt (line 8)) (4.0.0)\n",
185
- "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in e:\\meta\\venv\\lib\\site-packages (from rich>=12.3.0->typer->transformers->-r requirements.txt (line 8)) (2.20.0)\n",
186
- "Requirement already satisfied: mdurl~=0.1 in e:\\meta\\venv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=12.3.0->typer->transformers->-r requirements.txt (line 8)) (0.1.2)\n"
187
- ]
188
- },
189
- {
190
- "name": "stderr",
191
- "output_type": "stream",
192
- "text": [
193
- "\n",
194
- "[notice] A new release of pip is available: 25.2 -> 26.0.1\n",
195
- "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
196
- ]
197
- }
198
- ],
199
  "source": [
200
  "!pip install trl transformers datasets httpx fastapi uvicorn pydantic openai\n",
201
  "!git clone https://github.com/havinashpatil/meta.git\n",
@@ -206,39 +34,7 @@
206
  "cell_type": "code",
207
  "execution_count": 5,
208
  "metadata": {},
209
- "outputs": [
210
- {
211
- "ename": "RuntimeError",
212
- "evalue": "Failed to import trl.trainer.grpo_trainer because of the following error (look up to see its traceback):\n'charmap' codec can't decode byte 0x81 in position 932: character maps to <undefined>",
213
- "output_type": "error",
214
- "traceback": [
215
- "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
216
- "\u001b[31mUnicodeDecodeError\u001b[39m Traceback (most recent call last)",
217
- "\u001b[36mFile \u001b[39m\u001b[32me:\\meta\\venv\\Lib\\site-packages\\trl\\_lazy_module.py:71\u001b[39m, in \u001b[36m_LazyModule._get_module\u001b[39m\u001b[34m(self, module_name)\u001b[39m\n\u001b[32m 70\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m71\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[30;43mimportlib\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43mimport_module\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43m\"\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43m\"\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43m+\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mmodule_name\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mself\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43m__name__\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n\u001b[32m 72\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
218
- "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\importlib\\__init__.py:88\u001b[39m, in \u001b[36mimport_module\u001b[39m\u001b[34m(name, package)\u001b[39m\n\u001b[32m 87\u001b[39m level += \u001b[32m1\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m88\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[30;43m_bootstrap\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43m_gcd_import\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43mname\u001b[39;49m\u001b[30;43m[\u001b[39;49m\u001b[30;43mlevel\u001b[39;49m\u001b[30;43m:\u001b[39;49m\u001b[30;43m]\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mpackage\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mlevel\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n",
219
- "\u001b[36mFile \u001b[39m\u001b[32m<frozen importlib._bootstrap>:1387\u001b[39m, in \u001b[36m_gcd_import\u001b[39m\u001b[34m(name, package, level)\u001b[39m\n",
220
- "\u001b[36mFile \u001b[39m\u001b[32m<frozen importlib._bootstrap>:1360\u001b[39m, in \u001b[36m_find_and_load\u001b[39m\u001b[34m(name, import_)\u001b[39m\n",
221
- "\u001b[36mFile \u001b[39m\u001b[32m<frozen importlib._bootstrap>:1334\u001b[39m, in \u001b[36m_find_and_load_unlocked\u001b[39m\u001b[34m(name, import_)\u001b[39m\n",
222
- "\u001b[36mFile \u001b[39m\u001b[32m<frozen importlib._bootstrap>:950\u001b[39m, in \u001b[36m_load_unlocked\u001b[39m\u001b[34m(spec)\u001b[39m\n",
223
- "\u001b[36mFile \u001b[39m\u001b[32m<frozen importlib._bootstrap_external>:1026\u001b[39m, in \u001b[36m_LoaderBasics.exec_module\u001b[39m\u001b[34m(self, module)\u001b[39m\n",
224
- "\u001b[36mFile \u001b[39m\u001b[32m<frozen importlib._bootstrap>:488\u001b[39m, in \u001b[36m_call_with_frames_removed\u001b[39m\u001b[34m(f, *args, **kwds)\u001b[39m\n",
225
- "\u001b[36mFile \u001b[39m\u001b[32me:\\meta\\venv\\Lib\\site-packages\\trl\\trainer\\grpo_trainer.py:59\u001b[39m\n\u001b[32m 57\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtransformers\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m is_peft_available, is_rich_available\n\u001b[32m---> \u001b[39m\u001b[32m59\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01m.\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mchat_template_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 60\u001b[39m add_response_schema,\n\u001b[32m 61\u001b[39m get_training_chat_template,\n\u001b[32m 62\u001b[39m is_chat_template_prefix_preserving,\n\u001b[32m 63\u001b[39m parse_response,\n\u001b[32m 64\u001b[39m supports_tool_calling,\n\u001b[32m 65\u001b[39m )\n\u001b[32m 66\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01m.\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdata_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m apply_chat_template, is_conversational, prepare_multimodal_messages\n",
226
- "\u001b[36mFile \u001b[39m\u001b[32me:\\meta\\venv\\Lib\\site-packages\\trl\\chat_template_utils.py:309\u001b[39m\n\u001b[32m 273\u001b[39m qwen3_5_schema = {\n\u001b[32m 274\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mx-regex\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33mr\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m^(?:(?:<think>\u001b[39m\u001b[33m\\\u001b[39m\u001b[33mn?)?(?:(?P<reasoning_content>.*?\u001b[39m\u001b[33m\\\u001b[39m\u001b[33mS.*?)\u001b[39m\u001b[33m\\\u001b[39m\u001b[33mn?|[\u001b[39m\u001b[33m\\\u001b[39m\u001b[33ms]*)</think>\u001b[39m\u001b[33m\\\u001b[39m\u001b[33ms*)?(?P<content>.*?)(?:\u001b[39m\u001b[33m\\\u001b[39m\u001b[33mn+(?=<tool_call>))?(?=(?:<tool_call>|<\u001b[39m\u001b[33m\\\u001b[39m\u001b[33m|im_end\u001b[39m\u001b[33m\\\u001b[39m\u001b[33m|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\u001b[39m\u001b[33m\\\u001b[39m\u001b[33ms*)+)?\u001b[39m\u001b[33m\\\u001b[39m\u001b[33ms*(?:<\u001b[39m\u001b[33m\\\u001b[39m\u001b[33m|im_end\u001b[39m\u001b[33m\\\u001b[39m\u001b[33m|>|$)\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 275\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mtype\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33mobject\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 305\u001b[39m },\n\u001b[32m 306\u001b[39m }\n\u001b[32m--> \u001b[39m\u001b[32m309\u001b[39m deepseekv3_chat_template = \u001b[30;43m(\u001b[39;49m\u001b[30;43m_CHAT_TEMPLATES_DIR\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43m/\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43m\"\u001b[39;49m\u001b[30;43mdeepseekv3.jinja\u001b[39;49m\u001b[30;43m\"\u001b[39;49m\u001b[30;43m)\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43mread_text\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n\u001b[32m 311\u001b[39m glm4moe_chat_template = (_CHAT_TEMPLATES_DIR / \u001b[33m\"\u001b[39m\u001b[33mglm4moe.jinja\u001b[39m\u001b[33m\"\u001b[39m).read_text()\n",
227
- "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\pathlib\\_local.py:546\u001b[39m, in \u001b[36mPath.read_text\u001b[39m\u001b[34m(self, encoding, errors, newline)\u001b[39m\n\u001b[32m 545\u001b[39m encoding = io.text_encoding(encoding)\n\u001b[32m--> \u001b[39m\u001b[32m546\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[30;43mPathBase\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43mread_text\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43mself\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mencoding\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43merrors\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mnewline\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n",
228
- "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\pathlib\\_abc.py:633\u001b[39m, in \u001b[36mPathBase.read_text\u001b[39m\u001b[34m(self, encoding, errors, newline)\u001b[39m\n\u001b[32m 632\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m.open(mode=\u001b[33m'\u001b[39m\u001b[33mr\u001b[39m\u001b[33m'\u001b[39m, encoding=encoding, errors=errors, newline=newline) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[32m--> \u001b[39m\u001b[32m633\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[30;43mf\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43mread\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n",
229
- "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\encodings\\cp1252.py:23\u001b[39m, in \u001b[36mIncrementalDecoder.decode\u001b[39m\u001b[34m(self, input, final)\u001b[39m\n\u001b[32m 22\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdecode\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m, final=\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[32m---> \u001b[39m\u001b[32m23\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[30;43mcodecs\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43mcharmap_decode\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43minput\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43mself\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43merrors\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43mdecoding_table\u001b[39;49m\u001b[30;43m)\u001b[39;49m[\u001b[32m0\u001b[39m]\n",
230
- "\u001b[31mUnicodeDecodeError\u001b[39m: 'charmap' codec can't decode byte 0x81 in position 932: character maps to <undefined>",
231
- "\nThe above exception was the direct cause of the following exception:\n",
232
- "\u001b[31mRuntimeError\u001b[39m Traceback (most recent call last)",
233
- "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m torch\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m datasets \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m transformers \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForCausalLM, AutoTokenizer\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m trl \u001b[38;5;28;01mimport\u001b[39;00m GRPOConfig, GRPOTrainer\n\u001b[32m 5\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m httpx\n\u001b[32m 6\u001b[39m \n\u001b[32m 7\u001b[39m \u001b[38;5;66;03m# Start the backend server in the background (Colab trick)\u001b[39;00m\n",
234
- "\u001b[36mFile \u001b[39m\u001b[32m<frozen importlib._bootstrap>:1412\u001b[39m, in \u001b[36m_handle_fromlist\u001b[39m\u001b[34m(module, fromlist, import_, recursive)\u001b[39m\n",
235
- "\u001b[36mFile \u001b[39m\u001b[32me:\\meta\\venv\\Lib\\site-packages\\trl\\_lazy_module.py:62\u001b[39m, in \u001b[36m_LazyModule.__getattr__\u001b[39m\u001b[34m(self, name)\u001b[39m\n\u001b[32m 60\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._class_to_module.keys():\n\u001b[32m 61\u001b[39m module = \u001b[38;5;28mself\u001b[39m._get_module(\u001b[38;5;28mself\u001b[39m._class_to_module[name])\n\u001b[32m---> \u001b[39m\u001b[32m62\u001b[39m value = \u001b[30;43mgetattr\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43mmodule\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mname\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n\u001b[32m 63\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 64\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mmodule \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.\u001b[34m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m has no attribute \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n",
236
- "\u001b[36mFile \u001b[39m\u001b[32me:\\meta\\venv\\Lib\\site-packages\\trl\\_lazy_module.py:61\u001b[39m, in \u001b[36m_LazyModule.__getattr__\u001b[39m\u001b[34m(self, name)\u001b[39m\n\u001b[32m 59\u001b[39m value = \u001b[38;5;28mself\u001b[39m._get_module(name)\n\u001b[32m 60\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._class_to_module.keys():\n\u001b[32m---> \u001b[39m\u001b[32m61\u001b[39m module = \u001b[30;43mself\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43m_get_module\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43mself\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43m_class_to_module\u001b[39;49m\u001b[30;43m[\u001b[39;49m\u001b[30;43mname\u001b[39;49m\u001b[30;43m]\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n\u001b[32m 62\u001b[39m value = \u001b[38;5;28mgetattr\u001b[39m(module, name)\n\u001b[32m 63\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
237
- "\u001b[36mFile \u001b[39m\u001b[32me:\\meta\\venv\\Lib\\site-packages\\trl\\_lazy_module.py:73\u001b[39m, in \u001b[36m_LazyModule._get_module\u001b[39m\u001b[34m(self, module_name)\u001b[39m\n\u001b[32m 71\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m importlib.import_module(\u001b[33m\"\u001b[39m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m + module_name, \u001b[38;5;28mself\u001b[39m.\u001b[34m__name__\u001b[39m)\n\u001b[32m 72\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m---> \u001b[39m\u001b[32m73\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[32m 74\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFailed to import \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.\u001b[34m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodule_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m because of the following error (look up to see its\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 75\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m traceback):\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 76\u001b[39m ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n",
238
- "\u001b[31mRuntimeError\u001b[39m: Failed to import trl.trainer.grpo_trainer because of the following error (look up to see its traceback):\n'charmap' codec can't decode byte 0x81 in position 932: character maps to <undefined>"
239
- ]
240
- }
241
- ],
242
  "source": [
243
  "import torch\n",
244
  "from datasets import load_dataset\n",
@@ -360,4 +156,4 @@
360
  },
361
  "nbformat": 4,
362
  "nbformat_minor": 4
363
- }
 
10
  "It uses the `m-a-p/Code-Feedback` dataset to train the LLM for coding debugging and improving time complexity."
11
  ]
12
  },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {},
16
+ "source": [
17
+ "> \u26a0\ufe0f **Note for Judges**: This training notebook is designed to be run in **Google Colab (Linux)** with an active GPU.\n",
18
+ "> It uses HuggingFace TRL GRPOTrainer which requires Linux. Do not run locally on Windows.\n",
19
+ "> The code below demonstrates how CodeArena functions as a live environment-in-the-loop reward signal."
20
+ ]
21
+ },
22
  {
23
  "cell_type": "code",
24
  "execution_count": 4,
25
  "metadata": {},
26
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "source": [
28
  "!pip install trl transformers datasets httpx fastapi uvicorn pydantic openai\n",
29
  "!git clone https://github.com/havinashpatil/meta.git\n",
 
34
  "cell_type": "code",
35
  "execution_count": 5,
36
  "metadata": {},
37
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  "source": [
39
  "import torch\n",
40
  "from datasets import load_dataset\n",
 
156
  },
157
  "nbformat": 4,
158
  "nbformat_minor": 4
159
+ }