import React, { useState, useEffect, useRef, useCallback } from "react";
/* ─────────────────────────────────────────────
GOOGLE FONTS
───────────────────────────────────────────── */
const FontLoader = () => {
useEffect(() => {
const link = document.createElement("link");
link.rel = "stylesheet";
link.href =
"https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700&family=Rajdhani:wght@400;500;600;700&display=swap";
document.head.appendChild(link);
return () => document.head.removeChild(link);
}, []);
return null;
};
/* ─────────────────────────────────────────────
GLOBAL STYLES
───────────────────────────────────────────── */
const GlobalStyles = () => (
);
/* ─────────────────────────────────────────────
TASKS (mirrors server tasks — display only)
───────────────────────────────────────────── */
const TASKS = {
"easy-1": {
id: "easy-1", label: "Easy", name: "Fix average_list()", difficulty: "easy",
description: "Fix syntax errors: missing colon after def and uses length() instead of len().",
hints: ["Missing colon after def", "length() → len()"],
buggy_code: `def average_list(numbers)\n if length(numbers) == 0:\n return 0\n return sum(numbers) / length(numbers)`,
},
"medium-1": {
id: "medium-1", label: "Medium", name: "Fix binary_search()", difficulty: "medium",
description: "Fix logical bugs: loop condition skips last element; left pointer causes infinite loop.",
hints: ["left < right → left <= right", "left = mid → left = mid + 1"],
buggy_code: `def binary_search(arr, target):\n left, right = 0, len(arr) - 1\n while left < right:\n mid = (left + right) // 2\n if arr[mid] == target:\n return mid\n elif arr[mid] < target:\n left = mid\n else:\n right = mid - 1\n return -1`,
},
"hard-1": {
id: "hard-1", label: "Hard", name: "Optimize max_subarray_sum()", difficulty: "hard",
description: "Replace O(N³) brute-force with Kadane's O(N) algorithm.",
hints: ["Use Kadane's algorithm", "Eliminate triple nested loop"],
buggy_code: `def max_subarray_sum(arr):\n if not arr:\n return 0\n max_sum = arr[0]\n for i in range(len(arr)):\n for j in range(i, len(arr)):\n for k in range(i, j+1):\n current = sum(arr[i:j+1])\n if current > max_sum:\n max_sum = current\n return max_sum`,
},
};
/* ─────────────────────────────────────────────
REWARD COLOR
───────────────────────────────────────────── */
function rewardColor(r) {
if (r >= 0.85) return "#00ff88";
if (r >= 0.5) return "#ffaa00";
return "#ff4455";
}
/* ─────────────────────────────────────────────
ANSI LINE RENDERER
───────────────────────────────────────────── */
function AnsiLine({ text }) {
const parts = text.split(/\x1b\[(\d+)m/);
let color = null;
const els = [];
parts.forEach((p, i) => {
if (p === "32") color = "#00ff88";
else if (p === "33") color = "#ffaa00";
else if (p === "31") color = "#ff4455";
else if (p === "0") color = null;
else els.push({p});
});
return {els};
}
/* ─────────────────────────────────────────────
REWARD CHART
───────────────────────────────────────────── */
function RewardChart({ rewards }) {
const W = 260, H = 100, PAD = 20;
const pts = rewards.map((r, i) => ({
x: PAD + (i / Math.max(4, 1)) * (W - PAD * 2),
y: PAD + (1 - r) * (H - PAD * 2),
r,
}));
const pathD = pts.length > 1 ? pts.reduce((a, p, i) => i === 0 ? `M${p.x},${p.y}` : a + ` L${p.x},${p.y}`, "") : "";
const areaD = pts.length > 1 ? `${pathD} L${pts[pts.length - 1].x},${H - PAD} L${pts[0].x},${H - PAD} Z` : "";
return (
);
}
/* ─────────────────────────────────────────────
MAIN APP
───────────────────────────────────────────── */
export default function CodeArenaRL() {
/* ── Ollama config ── */
const [ollamaUrl, setOllamaUrl] = useState("http://localhost:11434");
const [ollamaModel, setOllamaModel] = useState("codellama");
const [availableModels, setAvailableModels] = useState([]);
const [ollamaStatus, setOllamaStatus] = useState("checking"); // checking | online | offline
/* ── OpenEnv (FastAPI) config ── */
const [envUrl, setEnvUrl] = useState("http://localhost:7860");
const [envStatus, setEnvStatus] = useState("checking");
/* ── Task & episode state ── */
const [selectedTask, setSelectedTask] = useState("easy-1");
const [envState, setEnvState] = useState(null); // observation from server
const [uiMode, setUiMode] = useState("idle"); // idle|resetting|agent_thinking|executing|done
const [episodeLog, setEpisodeLog] = useState([]);
const [rewards, setRewards] = useState([]);
const [stepCount, setStepCount] = useState(0);
const [isDone, setIsDone] = useState(false);
/* ── Code display ── */
const [streamingCode, setStreamingCode] = useState("");
const [agentCode, setAgentCode] = useState("");
const [isTyping, setIsTyping] = useState(false);
const [execOutput, setExecOutput] = useState([]); // lines from /step response
/* ── Manual mode ── */
const [manualMode, setManualMode] = useState(false);
const [manualCode, setManualCode] = useState("");
/* ── Speed ── */
const [speed, setSpeed] = useState("normal");
const speedMap = { slow: 3, normal: 1, fast: 0.25 };
/* ── Misc ── */
const [tokenEst, setTokenEst] = useState(0);
const [collapsedEntries, setCollapsedEntries] = useState(new Set());
const [copied, setCopied] = useState(false);
const [errorBanner, setErrorBanner] = useState("");
const runningRef = useRef(false);
const logRef = useRef(null);
const task = TASKS[selectedTask];
/* ──────────────────────────────────────────
STATUS PROBES
─────────────────────────────────────────── */
const probeOllama = useCallback(async () => {
setOllamaStatus("checking");
try {
const res = await fetch(`${ollamaUrl}/api/tags`, { signal: AbortSignal.timeout(3000) });
if (res.ok) {
const data = await res.json();
const names = (data.models || []).map(m => m.name);
setAvailableModels(names.length > 0 ? names : ["codellama", "llama3", "mistral", "deepseek-coder"]);
setOllamaStatus("online");
} else {
setOllamaStatus("offline");
}
} catch {
setOllamaStatus("offline");
setAvailableModels(["codellama", "llama3", "mistral", "deepseek-coder"]);
}
}, [ollamaUrl]);
const probeEnv = useCallback(async () => {
setEnvStatus("checking");
try {
const res = await fetch(`${envUrl}/`, { signal: AbortSignal.timeout(3000) });
setEnvStatus(res.ok ? "online" : "offline");
} catch {
setEnvStatus("offline");
}
}, [envUrl]);
useEffect(() => { probeOllama(); }, [probeOllama]);
useEffect(() => { probeEnv(); }, [probeEnv]);
/* ──────────────────────────────────────────
OPENENV API CALLS
─────────────────────────────────────────── */
const envReset = useCallback(async (taskId) => {
const res = await fetch(`${envUrl}/reset`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ task_id: taskId }),
});
if (!res.ok) throw new Error(`/reset failed: ${res.status}`);
const data = await res.json();
return data.observation; // { buggy_code, error_log, test_results, previous_attempts }
}, [envUrl]);
const envStep = useCallback(async (proposedFix) => {
const res = await fetch(`${envUrl}/step`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ proposed_fix: proposedFix }),
});
if (!res.ok) throw new Error(`/step failed: ${res.status}`);
const data = await res.json();
// { observation, reward, done, info }
return data;
}, [envUrl]);
/* ──────────────────────────────────────────
OLLAMA CALL
─────────────────────────────────────────── */
const callOllama = useCallback(async (obs) => {
const systemPrompt = `You are an expert Python debugging agent in a reinforcement learning environment. Return ONLY the fixed Python code — no explanation, no markdown, no code fences.`;
const prompt = [
`Task: ${task.description}`,
``,
`BUGGY CODE:`,
obs.buggy_code,
``,
`ERROR LOG:`,
obs.error_log || "No errors yet",
``,
`TEST RESULTS:`,
obs.test_results || "No tests run yet",
``,
`PREVIOUS FAILED ATTEMPTS (${(obs.previous_attempts || []).length}):`,
(obs.previous_attempts || []).length > 0
? obs.previous_attempts.join("\n---\n")
: "None",
``,
`Return ONLY the corrected Python code:`,
].join("\n");
const cleanCode = (text) =>
(text || "")
.trim()
.replace(/^```(?:python)?\n?/gm, "")
.replace(/```\s*$/gm, "")
.trim();
setTokenEst(Math.ceil(prompt.length / 4));
const requestGenerate = async () => {
const res = await fetch(`${ollamaUrl}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: ollamaModel,
prompt,
stream: false,
options: { temperature: 0.2, num_predict: 1024 },
}),
});
if (!res.ok) {
if (res.status === 404 || res.status === 405) {
return null;
}
const errText = await res.text();
throw new Error(`Ollama error ${res.status}: ${errText}`);
}
const data = await res.json();
return cleanCode(data.response || data.text || "");
};
const requestChat = async () => {
const res = await fetch(`${ollamaUrl}/api/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: ollamaModel,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: prompt },
],
stream: false,
options: { temperature: 0.2, max_tokens: 1024, top_p: 0.9 },
}),
});
if (!res.ok) {
const errText = await res.text();
throw new Error(`Ollama chat error ${res.status}: ${errText}`);
}
const data = await res.json();
return cleanCode(data.message?.content || data.response || data.text || "");
};
let code = await requestGenerate();
if (code === null) {
code = await requestChat();
}
if (!code) {
throw new Error("Ollama returned an empty response. Check the Ollama model endpoint and model name.");
}
return code;
}, [ollamaUrl, ollamaModel, task]);
/* ──────────────────────────────────────────
STREAM TEXT (typing animation)
─────────────────────────────────────────── */
const streamText = useCallback((text, setter, onDone) => {
const delay = Math.max(4, 18 * speedMap[speed]);
let i = 0;
setter("");
setIsTyping(true);
const iv = setInterval(() => {
if (!runningRef.current) { clearInterval(iv); return; }
i++;
setter(text.slice(0, i));
if (i >= text.length) { clearInterval(iv); setIsTyping(false); if (onDone) onDone(); }
}, delay);
}, [speed]);
/* ──────────────────────────────────────────
BUILD EXECUTION OUTPUT LINES from /step
─────────────────────────────────────────── */
const buildOutputLines = (stepResp) => {
const { reward, done, info, observation } = stepResp;
const meta = info?.execution_metadata || {};
const passed = meta.test_passed ?? "?";
const total = meta.test_total ?? "?";
const elapsed = (Math.random() * 0.003 + 0.001).toFixed(3);
const errors = meta.runtime_errors || observation?.error_log || "";
const lines = [];
if (reward >= 0.99 || (passed !== "?" && passed >= total)) {
lines.push(`\x1b[32m${"." .repeat(typeof total === "number" ? total : 3)}\x1b[0m`);
lines.push(`----------------------------------------------------------------------`);
lines.push(`Ran ${total} tests in ${elapsed}s`);
lines.push(``);
lines.push(`\x1b[32mOK\x1b[0m`);
} else if (passed > 0) {
const fail = total - passed;
lines.push(`\x1b[33m${"F".repeat(fail)}${"." .repeat(passed)}\x1b[0m`);
lines.push(``);
lines.push(`FAIL: partial solution — ${fail} test(s) failed`);
if (errors) lines.push(`RuntimeError: ${errors.split("\n")[0]}`);
lines.push(`----------------------------------------------------------------------`);
lines.push(`Ran ${total} tests in ${elapsed}s`);
lines.push(``);
lines.push(`\x1b[33mFAILED (failures=${fail})\x1b[0m`);
} else {
lines.push(`\x1b[31m${"F".repeat(typeof total === "number" ? total : 3)}\x1b[0m`);
lines.push(``);
if (errors) {
errors.split("\n").slice(0, 3).forEach(l => lines.push(l));
} else {
lines.push(`FAIL: all tests failed`);
}
lines.push(`----------------------------------------------------------------------`);
lines.push(`Ran ${total} tests in ${elapsed}s`);
lines.push(``);
lines.push(`\x1b[31mFAILED (errors=${typeof total === "number" ? total : "?"})\x1b[0m`);
}
return lines;
};
/* ──────────────────────────────────────────
STREAM OUTPUT LINES
─────────────────────────────────────────── */
const streamLines = useCallback((lines, onDone) => {
const delay = Math.max(60, 180 * speedMap[speed]);
let i = 0;
setExecOutput([]);
const iv = setInterval(() => {
if (!runningRef.current) { clearInterval(iv); return; }
i++;
setExecOutput(lines.slice(0, i));
if (i >= lines.length) { clearInterval(iv); if (onDone) onDone(); }
}, delay);
}, [speed]);
/* ──────────────────────────────────────────
RESET EPISODE
─────────────────────────────────────────── */
const resetEpisode = useCallback(() => {
runningRef.current = false;
setEnvState(null); setUiMode("idle");
setEpisodeLog([]); setRewards([]);
setStepCount(0); setIsDone(false);
setStreamingCode(""); setAgentCode("");
setExecOutput([]); setIsTyping(false);
setManualCode(""); setTokenEst(0);
setCollapsedEntries(new Set());
setErrorBanner("");
}, []);
/* ──────────────────────────────────────────
RUN ONE STEP
─────────────────────────────────────────── */
const runStep = useCallback(async (currentObs, currentStepCount) => {
if (!runningRef.current) return;
const mult = speedMap[speed];
const interStepDelay = Math.max(400, 1200 * mult);
/* 1. Agent thinking */
setUiMode("agent_thinking");
let fixedCode;
try {
fixedCode = manualMode
? (manualCode.trim() || currentObs.buggy_code)
: await callOllama(currentObs);
} catch (err) {
if (!runningRef.current) return;
setErrorBanner(`🦙 Ollama Error: ${err.message}`);
setUiMode("idle");
runningRef.current = false;
return;
}
if (!runningRef.current) return;
/* 2. Stream agent code */
await new Promise(resolve => streamText(fixedCode, setStreamingCode, resolve));
if (!runningRef.current) return;
setAgentCode(fixedCode);
/* 3. Call OpenEnv /step */
setUiMode("executing");
let stepResult;
try {
stepResult = await envStep(fixedCode);
} catch (err) {
if (!runningRef.current) return;
setErrorBanner(`🌐 OpenEnv Error: ${err.message}`);
setUiMode("idle");
runningRef.current = false;
return;
}
if (!runningRef.current) return;
const { observation: newObs, reward, done } = stepResult;
const meta = stepResult.info?.execution_metadata || {};
const passed = meta.test_passed ?? 0;
const total = meta.test_total ?? task.hints.length + 1;
const newStep = currentStepCount + 1;
/* 4. Stream execution output */
const outputLines = buildOutputLines(stepResult);
await new Promise(resolve => streamLines(outputLines, resolve));
if (!runningRef.current) return;
/* 5. Update state */
setEnvState(newObs);
setStepCount(newStep);
setRewards(prev => [...prev, reward]);
setIsDone(done);
const logEntry = {
step: newStep,
code_submitted: fixedCode,
reward, done, passed, total,
error_log: newObs?.error_log || "",
test_results: newObs?.test_results || "",
timestamp: new Date().toISOString(),
};
setEpisodeLog(prev => [logEntry, ...prev]);
/* 6. Done or continue */
if (done) {
setUiMode("done");
runningRef.current = false;
return;
}
/* Wait then continue */
await new Promise(r => setTimeout(r, interStepDelay));
if (!runningRef.current) return;
runStep(newObs, newStep);
}, [speed, manualMode, manualCode, callOllama, streamText, streamLines, envStep, task]);
/* ──────────────────────────────────────────
START EPISODE
─────────────────────────────────────────── */
const startEpisode = useCallback(async () => {
if (ollamaStatus !== "online" && !manualMode) {
setErrorBanner("🦙 Ollama is offline. Start Ollama or enable Manual Mode.");
return;
}
if (envStatus !== "online") {
setErrorBanner("🌐 OpenEnv server is offline. Run: uvicorn server.app:app --port 7860");
return;
}
resetEpisode();
setErrorBanner("");
await new Promise(r => setTimeout(r, 60));
runningRef.current = true;
setUiMode("resetting");
let initialObs;
try {
initialObs = await envReset(selectedTask);
} catch (err) {
setErrorBanner(`🌐 OpenEnv /reset Error: ${err.message}`);
setUiMode("idle");
runningRef.current = false;
return;
}
setEnvState(initialObs);
setTimeout(() => runStep(initialObs, 0), 400);
}, [ollamaStatus, envStatus, manualMode, resetEpisode, envReset, selectedTask, runStep]);
/* ──────────────────────────────────────────
COPY EPISODE JSON
─────────────────────────────────────────── */
const copyJSON = useCallback(() => {
const data = {
task: selectedTask,
model: ollamaModel,
timestamp: new Date().toISOString(),
total_steps: episodeLog.length,
final_reward: rewards[rewards.length - 1] ?? 0,
success: (rewards[rewards.length - 1] ?? 0) >= 0.99,
episode_log: [...episodeLog].reverse(),
};
navigator.clipboard.writeText(JSON.stringify(data, null, 2)).then(() => {
setCopied(true);
setTimeout(() => setCopied(false), 2200);
});
}, [selectedTask, ollamaModel, episodeLog, rewards]);
useEffect(() => { if (logRef.current) logRef.current.scrollTop = 0; }, [episodeLog.length]);
const totalReward = rewards.reduce((a, b) => a + b, 0);
const finalReward = rewards[rewards.length - 1] ?? 0;
const success = finalReward >= 0.99;
const isRunning = ["agent_thinking", "executing", "resetting"].includes(uiMode);
const termColor = line => {
if (line.includes("OK")) return "#00ff88";
if (line.includes("FAIL") || line.includes("Error") || line.includes("error")) return "#ff4455";
if (line.includes("---") || line.includes("Ran")) return "#64748b";
return "#94a3b8";
};
/* ──────────────────────────────────────────
RENDER
─────────────────────────────────────────── */
return (
<>
{task.description}
{/* Show real buggy code from env if available, else fallback to hardcoded */}
{(envState?.buggy_code) || task.buggy_code}
= 0.99 ? "#a8d8a8" : entry.reward >= 0.5 ? "#ffd580" : "#f8c8c8",
}}>{entry.code_submitted}
)}
>
)}