import React, { useState, useEffect, useRef, useCallback } from "react"; /* ───────────────────────────────────────────── GOOGLE FONTS ───────────────────────────────────────────── */ const FontLoader = () => { useEffect(() => { const link = document.createElement("link"); link.rel = "stylesheet"; link.href = "https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700&family=Rajdhani:wght@400;500;600;700&display=swap"; document.head.appendChild(link); return () => document.head.removeChild(link); }, []); return null; }; /* ───────────────────────────────────────────── GLOBAL STYLES ───────────────────────────────────────────── */ const GlobalStyles = () => ( ); /* ───────────────────────────────────────────── TASKS (mirrors server tasks — display only) ───────────────────────────────────────────── */ const TASKS = { "easy-1": { id: "easy-1", label: "Easy", name: "Fix average_list()", difficulty: "easy", description: "Fix syntax errors: missing colon after def and uses length() instead of len().", hints: ["Missing colon after def", "length() → len()"], buggy_code: `def average_list(numbers)\n if length(numbers) == 0:\n return 0\n return sum(numbers) / length(numbers)`, }, "medium-1": { id: "medium-1", label: "Medium", name: "Fix binary_search()", difficulty: "medium", description: "Fix logical bugs: loop condition skips last element; left pointer causes infinite loop.", hints: ["left < right → left <= right", "left = mid → left = mid + 1"], buggy_code: `def binary_search(arr, target):\n left, right = 0, len(arr) - 1\n while left < right:\n mid = (left + right) // 2\n if arr[mid] == target:\n return mid\n elif arr[mid] < target:\n left = mid\n else:\n right = mid - 1\n return -1`, }, "hard-1": { id: "hard-1", label: "Hard", name: "Optimize max_subarray_sum()", difficulty: "hard", description: "Replace O(N³) brute-force with Kadane's O(N) algorithm.", hints: ["Use Kadane's algorithm", "Eliminate triple nested loop"], buggy_code: `def max_subarray_sum(arr):\n if not arr:\n return 0\n max_sum = arr[0]\n for i in range(len(arr)):\n for j in range(i, len(arr)):\n for k in range(i, j+1):\n current = sum(arr[i:j+1])\n if current > max_sum:\n max_sum = current\n return max_sum`, }, }; /* ───────────────────────────────────────────── REWARD COLOR ───────────────────────────────────────────── */ function rewardColor(r) { if (r >= 0.85) return "#00ff88"; if (r >= 0.5) return "#ffaa00"; return "#ff4455"; } /* ───────────────────────────────────────────── ANSI LINE RENDERER ───────────────────────────────────────────── */ function AnsiLine({ text }) { const parts = text.split(/\x1b\[(\d+)m/); let color = null; const els = []; parts.forEach((p, i) => { if (p === "32") color = "#00ff88"; else if (p === "33") color = "#ffaa00"; else if (p === "31") color = "#ff4455"; else if (p === "0") color = null; else els.push({p}); }); return {els}; } /* ───────────────────────────────────────────── REWARD CHART ───────────────────────────────────────────── */ function RewardChart({ rewards }) { const W = 260, H = 100, PAD = 20; const pts = rewards.map((r, i) => ({ x: PAD + (i / Math.max(4, 1)) * (W - PAD * 2), y: PAD + (1 - r) * (H - PAD * 2), r, })); const pathD = pts.length > 1 ? pts.reduce((a, p, i) => i === 0 ? `M${p.x},${p.y}` : a + ` L${p.x},${p.y}`, "") : ""; const areaD = pts.length > 1 ? `${pathD} L${pts[pts.length - 1].x},${H - PAD} L${pts[0].x},${H - PAD} Z` : ""; return ( {[0, 0.5, 1].map(v => { const y = PAD + (1 - v) * (H - PAD * 2); return ; })} {[1, 2, 3, 4, 5].map(s => ( {s} ))} {areaD && } {pathD && } {pts.map((p, i) => )} ); } /* ───────────────────────────────────────────── MAIN APP ───────────────────────────────────────────── */ export default function CodeArenaRL() { /* ── Ollama config ── */ const [ollamaUrl, setOllamaUrl] = useState("http://localhost:11434"); const [ollamaModel, setOllamaModel] = useState("codellama"); const [availableModels, setAvailableModels] = useState([]); const [ollamaStatus, setOllamaStatus] = useState("checking"); // checking | online | offline /* ── OpenEnv (FastAPI) config ── */ const [envUrl, setEnvUrl] = useState("http://localhost:7860"); const [envStatus, setEnvStatus] = useState("checking"); /* ── Task & episode state ── */ const [selectedTask, setSelectedTask] = useState("easy-1"); const [envState, setEnvState] = useState(null); // observation from server const [uiMode, setUiMode] = useState("idle"); // idle|resetting|agent_thinking|executing|done const [episodeLog, setEpisodeLog] = useState([]); const [rewards, setRewards] = useState([]); const [stepCount, setStepCount] = useState(0); const [isDone, setIsDone] = useState(false); /* ── Code display ── */ const [streamingCode, setStreamingCode] = useState(""); const [agentCode, setAgentCode] = useState(""); const [isTyping, setIsTyping] = useState(false); const [execOutput, setExecOutput] = useState([]); // lines from /step response /* ── Manual mode ── */ const [manualMode, setManualMode] = useState(false); const [manualCode, setManualCode] = useState(""); /* ── Speed ── */ const [speed, setSpeed] = useState("normal"); const speedMap = { slow: 3, normal: 1, fast: 0.25 }; /* ── Misc ── */ const [tokenEst, setTokenEst] = useState(0); const [collapsedEntries, setCollapsedEntries] = useState(new Set()); const [copied, setCopied] = useState(false); const [errorBanner, setErrorBanner] = useState(""); const runningRef = useRef(false); const logRef = useRef(null); const task = TASKS[selectedTask]; /* ────────────────────────────────────────── STATUS PROBES ─────────────────────────────────────────── */ const probeOllama = useCallback(async () => { setOllamaStatus("checking"); try { const res = await fetch(`${ollamaUrl}/api/tags`, { signal: AbortSignal.timeout(3000) }); if (res.ok) { const data = await res.json(); const names = (data.models || []).map(m => m.name); setAvailableModels(names.length > 0 ? names : ["codellama", "llama3", "mistral", "deepseek-coder"]); setOllamaStatus("online"); } else { setOllamaStatus("offline"); } } catch { setOllamaStatus("offline"); setAvailableModels(["codellama", "llama3", "mistral", "deepseek-coder"]); } }, [ollamaUrl]); const probeEnv = useCallback(async () => { setEnvStatus("checking"); try { const res = await fetch(`${envUrl}/`, { signal: AbortSignal.timeout(3000) }); setEnvStatus(res.ok ? "online" : "offline"); } catch { setEnvStatus("offline"); } }, [envUrl]); useEffect(() => { probeOllama(); }, [probeOllama]); useEffect(() => { probeEnv(); }, [probeEnv]); /* ────────────────────────────────────────── OPENENV API CALLS ─────────────────────────────────────────── */ const envReset = useCallback(async (taskId) => { const res = await fetch(`${envUrl}/reset`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ task_id: taskId }), }); if (!res.ok) throw new Error(`/reset failed: ${res.status}`); const data = await res.json(); return data.observation; // { buggy_code, error_log, test_results, previous_attempts } }, [envUrl]); const envStep = useCallback(async (proposedFix) => { const res = await fetch(`${envUrl}/step`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ proposed_fix: proposedFix }), }); if (!res.ok) throw new Error(`/step failed: ${res.status}`); const data = await res.json(); // { observation, reward, done, info } return data; }, [envUrl]); /* ────────────────────────────────────────── OLLAMA CALL ─────────────────────────────────────────── */ const callOllama = useCallback(async (obs) => { const systemPrompt = `You are an expert Python debugging agent in a reinforcement learning environment. Return ONLY the fixed Python code — no explanation, no markdown, no code fences.`; const prompt = [ `Task: ${task.description}`, ``, `BUGGY CODE:`, obs.buggy_code, ``, `ERROR LOG:`, obs.error_log || "No errors yet", ``, `TEST RESULTS:`, obs.test_results || "No tests run yet", ``, `PREVIOUS FAILED ATTEMPTS (${(obs.previous_attempts || []).length}):`, (obs.previous_attempts || []).length > 0 ? obs.previous_attempts.join("\n---\n") : "None", ``, `Return ONLY the corrected Python code:`, ].join("\n"); const cleanCode = (text) => (text || "") .trim() .replace(/^```(?:python)?\n?/gm, "") .replace(/```\s*$/gm, "") .trim(); setTokenEst(Math.ceil(prompt.length / 4)); const requestGenerate = async () => { const res = await fetch(`${ollamaUrl}/api/generate`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: ollamaModel, prompt, stream: false, options: { temperature: 0.2, num_predict: 1024 }, }), }); if (!res.ok) { if (res.status === 404 || res.status === 405) { return null; } const errText = await res.text(); throw new Error(`Ollama error ${res.status}: ${errText}`); } const data = await res.json(); return cleanCode(data.response || data.text || ""); }; const requestChat = async () => { const res = await fetch(`${ollamaUrl}/api/chat`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: ollamaModel, messages: [ { role: "system", content: systemPrompt }, { role: "user", content: prompt }, ], stream: false, options: { temperature: 0.2, max_tokens: 1024, top_p: 0.9 }, }), }); if (!res.ok) { const errText = await res.text(); throw new Error(`Ollama chat error ${res.status}: ${errText}`); } const data = await res.json(); return cleanCode(data.message?.content || data.response || data.text || ""); }; let code = await requestGenerate(); if (code === null) { code = await requestChat(); } if (!code) { throw new Error("Ollama returned an empty response. Check the Ollama model endpoint and model name."); } return code; }, [ollamaUrl, ollamaModel, task]); /* ────────────────────────────────────────── STREAM TEXT (typing animation) ─────────────────────────────────────────── */ const streamText = useCallback((text, setter, onDone) => { const delay = Math.max(4, 18 * speedMap[speed]); let i = 0; setter(""); setIsTyping(true); const iv = setInterval(() => { if (!runningRef.current) { clearInterval(iv); return; } i++; setter(text.slice(0, i)); if (i >= text.length) { clearInterval(iv); setIsTyping(false); if (onDone) onDone(); } }, delay); }, [speed]); /* ────────────────────────────────────────── BUILD EXECUTION OUTPUT LINES from /step ─────────────────────────────────────────── */ const buildOutputLines = (stepResp) => { const { reward, done, info, observation } = stepResp; const meta = info?.execution_metadata || {}; const passed = meta.test_passed ?? "?"; const total = meta.test_total ?? "?"; const elapsed = (Math.random() * 0.003 + 0.001).toFixed(3); const errors = meta.runtime_errors || observation?.error_log || ""; const lines = []; if (reward >= 0.99 || (passed !== "?" && passed >= total)) { lines.push(`\x1b[32m${"." .repeat(typeof total === "number" ? total : 3)}\x1b[0m`); lines.push(`----------------------------------------------------------------------`); lines.push(`Ran ${total} tests in ${elapsed}s`); lines.push(``); lines.push(`\x1b[32mOK\x1b[0m`); } else if (passed > 0) { const fail = total - passed; lines.push(`\x1b[33m${"F".repeat(fail)}${"." .repeat(passed)}\x1b[0m`); lines.push(``); lines.push(`FAIL: partial solution — ${fail} test(s) failed`); if (errors) lines.push(`RuntimeError: ${errors.split("\n")[0]}`); lines.push(`----------------------------------------------------------------------`); lines.push(`Ran ${total} tests in ${elapsed}s`); lines.push(``); lines.push(`\x1b[33mFAILED (failures=${fail})\x1b[0m`); } else { lines.push(`\x1b[31m${"F".repeat(typeof total === "number" ? total : 3)}\x1b[0m`); lines.push(``); if (errors) { errors.split("\n").slice(0, 3).forEach(l => lines.push(l)); } else { lines.push(`FAIL: all tests failed`); } lines.push(`----------------------------------------------------------------------`); lines.push(`Ran ${total} tests in ${elapsed}s`); lines.push(``); lines.push(`\x1b[31mFAILED (errors=${typeof total === "number" ? total : "?"})\x1b[0m`); } return lines; }; /* ────────────────────────────────────────── STREAM OUTPUT LINES ─────────────────────────────────────────── */ const streamLines = useCallback((lines, onDone) => { const delay = Math.max(60, 180 * speedMap[speed]); let i = 0; setExecOutput([]); const iv = setInterval(() => { if (!runningRef.current) { clearInterval(iv); return; } i++; setExecOutput(lines.slice(0, i)); if (i >= lines.length) { clearInterval(iv); if (onDone) onDone(); } }, delay); }, [speed]); /* ────────────────────────────────────────── RESET EPISODE ─────────────────────────────────────────── */ const resetEpisode = useCallback(() => { runningRef.current = false; setEnvState(null); setUiMode("idle"); setEpisodeLog([]); setRewards([]); setStepCount(0); setIsDone(false); setStreamingCode(""); setAgentCode(""); setExecOutput([]); setIsTyping(false); setManualCode(""); setTokenEst(0); setCollapsedEntries(new Set()); setErrorBanner(""); }, []); /* ────────────────────────────────────────── RUN ONE STEP ─────────────────────────────────────────── */ const runStep = useCallback(async (currentObs, currentStepCount) => { if (!runningRef.current) return; const mult = speedMap[speed]; const interStepDelay = Math.max(400, 1200 * mult); /* 1. Agent thinking */ setUiMode("agent_thinking"); let fixedCode; try { fixedCode = manualMode ? (manualCode.trim() || currentObs.buggy_code) : await callOllama(currentObs); } catch (err) { if (!runningRef.current) return; setErrorBanner(`🦙 Ollama Error: ${err.message}`); setUiMode("idle"); runningRef.current = false; return; } if (!runningRef.current) return; /* 2. Stream agent code */ await new Promise(resolve => streamText(fixedCode, setStreamingCode, resolve)); if (!runningRef.current) return; setAgentCode(fixedCode); /* 3. Call OpenEnv /step */ setUiMode("executing"); let stepResult; try { stepResult = await envStep(fixedCode); } catch (err) { if (!runningRef.current) return; setErrorBanner(`🌐 OpenEnv Error: ${err.message}`); setUiMode("idle"); runningRef.current = false; return; } if (!runningRef.current) return; const { observation: newObs, reward, done } = stepResult; const meta = stepResult.info?.execution_metadata || {}; const passed = meta.test_passed ?? 0; const total = meta.test_total ?? task.hints.length + 1; const newStep = currentStepCount + 1; /* 4. Stream execution output */ const outputLines = buildOutputLines(stepResult); await new Promise(resolve => streamLines(outputLines, resolve)); if (!runningRef.current) return; /* 5. Update state */ setEnvState(newObs); setStepCount(newStep); setRewards(prev => [...prev, reward]); setIsDone(done); const logEntry = { step: newStep, code_submitted: fixedCode, reward, done, passed, total, error_log: newObs?.error_log || "", test_results: newObs?.test_results || "", timestamp: new Date().toISOString(), }; setEpisodeLog(prev => [logEntry, ...prev]); /* 6. Done or continue */ if (done) { setUiMode("done"); runningRef.current = false; return; } /* Wait then continue */ await new Promise(r => setTimeout(r, interStepDelay)); if (!runningRef.current) return; runStep(newObs, newStep); }, [speed, manualMode, manualCode, callOllama, streamText, streamLines, envStep, task]); /* ────────────────────────────────────────── START EPISODE ─────────────────────────────────────────── */ const startEpisode = useCallback(async () => { if (ollamaStatus !== "online" && !manualMode) { setErrorBanner("🦙 Ollama is offline. Start Ollama or enable Manual Mode."); return; } if (envStatus !== "online") { setErrorBanner("🌐 OpenEnv server is offline. Run: uvicorn server.app:app --port 7860"); return; } resetEpisode(); setErrorBanner(""); await new Promise(r => setTimeout(r, 60)); runningRef.current = true; setUiMode("resetting"); let initialObs; try { initialObs = await envReset(selectedTask); } catch (err) { setErrorBanner(`🌐 OpenEnv /reset Error: ${err.message}`); setUiMode("idle"); runningRef.current = false; return; } setEnvState(initialObs); setTimeout(() => runStep(initialObs, 0), 400); }, [ollamaStatus, envStatus, manualMode, resetEpisode, envReset, selectedTask, runStep]); /* ────────────────────────────────────────── COPY EPISODE JSON ─────────────────────────────────────────── */ const copyJSON = useCallback(() => { const data = { task: selectedTask, model: ollamaModel, timestamp: new Date().toISOString(), total_steps: episodeLog.length, final_reward: rewards[rewards.length - 1] ?? 0, success: (rewards[rewards.length - 1] ?? 0) >= 0.99, episode_log: [...episodeLog].reverse(), }; navigator.clipboard.writeText(JSON.stringify(data, null, 2)).then(() => { setCopied(true); setTimeout(() => setCopied(false), 2200); }); }, [selectedTask, ollamaModel, episodeLog, rewards]); useEffect(() => { if (logRef.current) logRef.current.scrollTop = 0; }, [episodeLog.length]); const totalReward = rewards.reduce((a, b) => a + b, 0); const finalReward = rewards[rewards.length - 1] ?? 0; const success = finalReward >= 0.99; const isRunning = ["agent_thinking", "executing", "resetting"].includes(uiMode); const termColor = line => { if (line.includes("OK")) return "#00ff88"; if (line.includes("FAIL") || line.includes("Error") || line.includes("error")) return "#ff4455"; if (line.includes("---") || line.includes("Ran")) return "#64748b"; return "#94a3b8"; }; /* ────────────────────────────────────────── RENDER ─────────────────────────────────────────── */ return ( <>
{/* ── NAVBAR ── */} {/* ── ERROR BANNER ── */} {errorBanner && (
{errorBanner}
)} {/* ── MAIN GRID ── */}
{/* ════════════════════ LEFT PANEL ════════════════════ */}
{/* Ollama Config */}
🦙  Ollama Config
Base URL
setOllamaUrl(e.target.value)} placeholder="http://localhost:11434" />
Model
{availableModels.length > 0 ? ( ) : ( setOllamaModel(e.target.value)} placeholder="codellama" /> )}
{ollamaStatus === "offline" && (
💡 Run: ollama serve
Then pull a model:
ollama pull codellama
)}
{/* OpenEnv Config */}
🌐  OpenEnv Server
FastAPI URL
setEnvUrl(e.target.value)} placeholder="http://localhost:7860" />
{envStatus === "offline" && (
⚠ Start server:
uvicorn server.app:app --port 7860
)}
{/* Task Selector */}
🎯  Select Task
{Object.values(TASKS).map(t => { const isActive = selectedTask === t.id; const ac = t.difficulty === "easy" ? "active" : t.difficulty === "medium" ? "active-medium" : "active-hard"; return ( ); })}
{/* Task Info */}
📋  Task Info

{task.description}

{task.hints.map((h, i) => (
💡 {h}
))}
{/* Options */}
⚙️  Options
Manual Mode
Type fix yourself
Speed {speed.toUpperCase()}
{["slow", "normal", "fast"].map(s => ( ))}
{/* Episode Controls */}
▶  Episode Controls
{/* Episode Stats */} {stepCount > 0 && (
📊  Episode Stats
Step
{stepCount} /5
Cumulative Reward
{totalReward.toFixed(3)}
Status
{{ idle: "IDLE", resetting: "RESETTING", agent_thinking: "THINKING", executing: "EXECUTING", done: success ? "✓ SUCCESS" : "✗ FAILED" }[uiMode]}
{tokenEst > 0 && (
~{tokenEst} prompt tokens
)}
)}
{/* ════════════════════ CENTER PANEL ════════════════════ */}
{/* Buggy Code */}
 Buggy Code {task.id}
                  {/* Show real buggy code from env if available, else fallback to hardcoded */}
                  {(envState?.buggy_code) || task.buggy_code}
                
{/* Agent's Fix */}
🦙 Agent's Fix {uiMode === "agent_thinking" && ( OLLAMA THINKING... )} {ollamaModel}
{manualMode && uiMode === "idle" ? (