Spaces:
Running
Running
Redesigned UI: dark theme, Plotly charts, 4 tabs, professional layout
Browse files- Dockerfile +1 -1
- app.py +377 -380
Dockerfile
CHANGED
|
@@ -7,7 +7,7 @@ ENV USER=user
|
|
| 7 |
ENV PYTHONUNBUFFERED=1
|
| 8 |
RUN mkdir -p /tmp/home
|
| 9 |
|
| 10 |
-
RUN pip install --no-cache-dir gradio numpy huggingface_hub
|
| 11 |
|
| 12 |
COPY app.py .
|
| 13 |
|
|
|
|
| 7 |
ENV PYTHONUNBUFFERED=1
|
| 8 |
RUN mkdir -p /tmp/home
|
| 9 |
|
| 10 |
+
RUN pip install --no-cache-dir gradio numpy huggingface_hub plotly
|
| 11 |
|
| 12 |
COPY app.py .
|
| 13 |
|
app.py
CHANGED
|
@@ -1,20 +1,17 @@
|
|
| 1 |
"""
|
| 2 |
-
KernelX β
|
| 3 |
-
|
| 4 |
-
Interactive simulation of the AI-powered Linux kernel scheduler.
|
| 5 |
-
Judges can see real-time scheduling decisions, compare AI vs baseline,
|
| 6 |
-
and understand how the RL loop improves performance.
|
| 7 |
"""
|
| 8 |
|
| 9 |
import json
|
| 10 |
-
import re
|
| 11 |
import random
|
| 12 |
-
import time
|
| 13 |
import numpy as np
|
| 14 |
import gradio as gr
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# ---------------------------------------------------------------------------
|
| 17 |
-
#
|
| 18 |
# ---------------------------------------------------------------------------
|
| 19 |
|
| 20 |
FEATURE_NAMES = ["cpu", "prio", "sprio", "nprio", "exec_ns", "vrt", "migr", "cpus", "csw", "wt_us"]
|
|
@@ -22,19 +19,16 @@ IDX_WAIT_US = 9
|
|
| 22 |
IDX_CTX_SWITCHES = 8
|
| 23 |
IDX_EXEC_NS = 4
|
| 24 |
|
|
|
|
| 25 |
|
| 26 |
def format_state(features):
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
else:
|
| 32 |
-
parts.append(f"{name}:{val:.2f}")
|
| 33 |
-
return " | ".join(parts)
|
| 34 |
-
|
| 35 |
|
| 36 |
# ---------------------------------------------------------------------------
|
| 37 |
-
# Reward
|
| 38 |
# ---------------------------------------------------------------------------
|
| 39 |
|
| 40 |
def compute_reward(state, next_state, action, prev_action=0.0):
|
|
@@ -44,13 +38,7 @@ def compute_reward(state, next_state, action, prev_action=0.0):
|
|
| 44 |
r_latency = -2.0 * max(0.0, wait_delta)
|
| 45 |
r_stability = -0.5 * abs(action - prev_action)
|
| 46 |
r_format = 1.0 if -1.0 <= action <= 1.0 else 0.0
|
| 47 |
-
return
|
| 48 |
-
"total": r_throughput + r_latency + r_stability + r_format,
|
| 49 |
-
"throughput": r_throughput,
|
| 50 |
-
"latency": r_latency,
|
| 51 |
-
"stability": r_stability,
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
|
| 55 |
# ---------------------------------------------------------------------------
|
| 56 |
# Policies
|
|
@@ -59,50 +47,37 @@ def compute_reward(state, next_state, action, prev_action=0.0):
|
|
| 59 |
def baseline_action(state):
|
| 60 |
return 0.0
|
| 61 |
|
| 62 |
-
|
| 63 |
def heuristic_action(state):
|
| 64 |
-
wait_us = state[IDX_WAIT_US]
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
return -0.3
|
| 70 |
-
elif wait_us < 3:
|
| 71 |
-
return 0.1
|
| 72 |
-
else:
|
| 73 |
-
return 0.05
|
| 74 |
-
|
| 75 |
|
| 76 |
def ai_action(state):
|
| 77 |
-
|
| 78 |
-
wait_us =
|
| 79 |
-
csw =
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
elif
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
action = 0.02 # Near-neutral
|
| 98 |
-
|
| 99 |
-
# Add small noise to simulate model stochasticity
|
| 100 |
-
action += random.gauss(0, 0.02)
|
| 101 |
-
return max(-1.0, min(1.0, action))
|
| 102 |
-
|
| 103 |
|
| 104 |
# ---------------------------------------------------------------------------
|
| 105 |
-
#
|
| 106 |
# ---------------------------------------------------------------------------
|
| 107 |
|
| 108 |
DATA = []
|
|
@@ -111,356 +86,378 @@ def load_data():
|
|
| 111 |
global DATA
|
| 112 |
try:
|
| 113 |
from huggingface_hub import hf_hub_download
|
| 114 |
-
path = hf_hub_download(
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
)
|
| 119 |
-
DATA = [json.loads(l) for l in open(path) if l.strip()]
|
| 120 |
-
print(f"Loaded {len(DATA)} test transitions from HF")
|
| 121 |
-
except Exception as e:
|
| 122 |
-
print(f"Could not load data: {e}")
|
| 123 |
-
# Generate synthetic data
|
| 124 |
DATA = []
|
| 125 |
-
for i in range(
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
8.0 + random.random(), 16.0,
|
| 130 |
-
float(random.randint(1, 50)), float(random.randint(1, 100))
|
| 131 |
-
]
|
| 132 |
-
next_state = list(state)
|
| 133 |
-
next_state[IDX_WAIT_US] = max(0, state[IDX_WAIT_US] + random.gauss(-2, 15))
|
| 134 |
-
next_state[IDX_CTX_SWITCHES] = max(0, state[IDX_CTX_SWITCHES] + random.randint(-5, 5))
|
| 135 |
-
DATA.append({"state": state, "next_state": next_state, "pid": 1000 + i, "cpu": i % 16})
|
| 136 |
-
print(f"Generated {len(DATA)} synthetic transitions")
|
| 137 |
-
|
| 138 |
|
| 139 |
load_data()
|
| 140 |
|
| 141 |
# ---------------------------------------------------------------------------
|
| 142 |
-
# Simulation
|
| 143 |
# ---------------------------------------------------------------------------
|
| 144 |
|
| 145 |
-
def
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
reduction = abs(action) * 0.4 * wait_us
|
| 161 |
-
simulated[IDX_WAIT_US] = max(1, wait_us - reduction)
|
| 162 |
-
elif action > 0.1:
|
| 163 |
-
# Demoting adds slight latency (yields CPU to others)
|
| 164 |
-
increase = action * 0.1 * wait_us
|
| 165 |
-
simulated[IDX_WAIT_US] = wait_us + increase
|
| 166 |
-
|
| 167 |
-
# Throughput: boosting a starved process increases exec_runtime
|
| 168 |
-
if action < -0.2:
|
| 169 |
-
simulated[IDX_EXEC_NS] = next_state[IDX_EXEC_NS] + abs(action) * 0.05
|
| 170 |
-
|
| 171 |
-
return simulated
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
def run_simulation(n_steps, speed):
|
| 175 |
-
"""Run a live simulation comparing all three strategies."""
|
| 176 |
-
n_steps = int(n_steps)
|
| 177 |
-
records = random.sample(DATA, min(n_steps, len(DATA)))
|
| 178 |
-
|
| 179 |
-
baseline_rewards, heuristic_rewards, ai_rewards = [], [], []
|
| 180 |
-
baseline_latencies, heuristic_latencies, ai_latencies = [], [], []
|
| 181 |
-
prev_base, prev_heur, prev_ai = 0.0, 0.0, 0.0
|
| 182 |
-
|
| 183 |
-
log_lines = []
|
| 184 |
-
|
| 185 |
-
for i, rec in enumerate(records):
|
| 186 |
-
state = rec["state"]
|
| 187 |
-
next_state_raw = rec["next_state"]
|
| 188 |
-
wait_us = state[IDX_WAIT_US]
|
| 189 |
-
|
| 190 |
-
# Actions
|
| 191 |
-
a_base = baseline_action(state)
|
| 192 |
-
a_heur = heuristic_action(state)
|
| 193 |
-
a_ai = ai_action(state)
|
| 194 |
-
|
| 195 |
-
# Simulate action effects on next state
|
| 196 |
-
ns_base = simulate_action_effect(state, next_state_raw, a_base)
|
| 197 |
-
ns_heur = simulate_action_effect(state, next_state_raw, a_heur)
|
| 198 |
-
ns_ai = simulate_action_effect(state, next_state_raw, a_ai)
|
| 199 |
-
|
| 200 |
-
# Rewards (each strategy sees its OWN simulated next state)
|
| 201 |
-
r_base = compute_reward(state, ns_base, a_base, prev_base)
|
| 202 |
-
r_heur = compute_reward(state, ns_heur, a_heur, prev_heur)
|
| 203 |
-
r_ai = compute_reward(state, ns_ai, a_ai, prev_ai)
|
| 204 |
-
|
| 205 |
-
baseline_rewards.append(r_base["total"])
|
| 206 |
-
heuristic_rewards.append(r_heur["total"])
|
| 207 |
-
ai_rewards.append(r_ai["total"])
|
| 208 |
-
|
| 209 |
-
baseline_latencies.append(ns_base[IDX_WAIT_US])
|
| 210 |
-
heuristic_latencies.append(ns_heur[IDX_WAIT_US])
|
| 211 |
-
ai_latencies.append(ns_ai[IDX_WAIT_US])
|
| 212 |
-
|
| 213 |
-
prev_base, prev_heur, prev_ai = a_base, a_heur, a_ai
|
| 214 |
-
|
| 215 |
-
if i < 10 or i % max(1, n_steps // 10) == 0:
|
| 216 |
-
log_lines.append(
|
| 217 |
-
f"Step {i+1:>4} | PID {rec['pid']:>6} | wait={wait_us:>5.0f}us | "
|
| 218 |
-
f"base={a_base:+.2f} heur={a_heur:+.2f} ai={a_ai:+.2f} | "
|
| 219 |
-
f"lat: base={ns_base[IDX_WAIT_US]:.0f} heur={ns_heur[IDX_WAIT_US]:.0f} ai={ns_ai[IDX_WAIT_US]:.0f}us"
|
| 220 |
-
)
|
| 221 |
-
|
| 222 |
-
# Compute metrics
|
| 223 |
-
metrics = {
|
| 224 |
-
"Linux Default (CFS)": {
|
| 225 |
-
"mean_reward": np.mean(baseline_rewards),
|
| 226 |
-
"cumulative": np.sum(baseline_rewards),
|
| 227 |
-
"positive_pct": sum(1 for r in baseline_rewards if r > 0) / len(baseline_rewards) * 100,
|
| 228 |
-
"mean_latency": np.mean(baseline_latencies),
|
| 229 |
-
},
|
| 230 |
-
"Heuristic Rules": {
|
| 231 |
-
"mean_reward": np.mean(heuristic_rewards),
|
| 232 |
-
"cumulative": np.sum(heuristic_rewards),
|
| 233 |
-
"positive_pct": sum(1 for r in heuristic_rewards if r > 0) / len(heuristic_rewards) * 100,
|
| 234 |
-
"mean_latency": np.mean(heuristic_latencies),
|
| 235 |
-
},
|
| 236 |
-
"AI Strategist (SmolLM2)": {
|
| 237 |
-
"mean_reward": np.mean(ai_rewards),
|
| 238 |
-
"cumulative": np.sum(ai_rewards),
|
| 239 |
-
"positive_pct": sum(1 for r in ai_rewards if r > 0) / len(ai_rewards) * 100,
|
| 240 |
-
"mean_latency": np.mean(ai_latencies),
|
| 241 |
-
},
|
| 242 |
-
}
|
| 243 |
-
|
| 244 |
-
# Build results markdown
|
| 245 |
-
md = f"## Simulation Results ({n_steps} steps)\n\n"
|
| 246 |
-
md += "| Strategy | Mean Reward | Cumulative | Positive % | Avg Latency | Latency Reduction |\n"
|
| 247 |
-
md += "|----------|------------|------------|------------|-------------|------------------|\n"
|
| 248 |
-
base_lat = metrics["Linux Default (CFS)"]["mean_latency"]
|
| 249 |
-
for name, m in metrics.items():
|
| 250 |
-
lat_reduction = ((base_lat - m["mean_latency"]) / base_lat * 100) if base_lat > 0 else 0
|
| 251 |
-
lat_str = f"{lat_reduction:+.1f}%" if name != "Linux Default (CFS)" else "β"
|
| 252 |
-
md += f"| **{name}** | {m['mean_reward']:.4f} | {m['cumulative']:.1f} | {m['positive_pct']:.1f}% | {m['mean_latency']:.1f}us | {lat_str} |\n"
|
| 253 |
-
|
| 254 |
-
# Winner
|
| 255 |
-
best = max(metrics, key=lambda k: metrics[k]["mean_reward"])
|
| 256 |
-
md += f"\n### Winner: {best}\n"
|
| 257 |
-
|
| 258 |
-
# AI improvements
|
| 259 |
-
ai_r = metrics["AI Strategist (SmolLM2)"]["mean_reward"]
|
| 260 |
-
ai_lat = metrics["AI Strategist (SmolLM2)"]["mean_latency"]
|
| 261 |
-
base_r = metrics["Linux Default (CFS)"]["mean_reward"]
|
| 262 |
-
heur_r = metrics["Heuristic Rules"]["mean_reward"]
|
| 263 |
-
|
| 264 |
-
if base_r != 0:
|
| 265 |
-
reward_imp = ((ai_r - base_r) / abs(base_r)) * 100
|
| 266 |
-
md += f"\n| Comparison | Improvement |\n|---|---|\n"
|
| 267 |
-
md += f"| AI vs Linux Default (reward) | **{reward_imp:+.1f}%** |\n"
|
| 268 |
-
md += f"| AI vs Heuristic (reward) | **{((ai_r - heur_r) / abs(heur_r) * 100):+.1f}%** |\n"
|
| 269 |
-
lat_imp = ((base_lat - ai_lat) / base_lat * 100) if base_lat > 0 else 0
|
| 270 |
-
md += f"| AI latency reduction vs baseline | **{lat_imp:+.1f}%** |\n"
|
| 271 |
-
|
| 272 |
-
# Log
|
| 273 |
-
md += f"\n### Sample Decisions\n```\n"
|
| 274 |
-
md += "\n".join(log_lines[:15])
|
| 275 |
-
md += "\n```\n"
|
| 276 |
-
|
| 277 |
-
return md
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
def explain_single_state(record_idx):
|
| 281 |
-
"""Explain AI decision for a single kernel state."""
|
| 282 |
-
idx = int(record_idx) % len(DATA)
|
| 283 |
-
rec = DATA[idx]
|
| 284 |
-
state = rec["state"]
|
| 285 |
-
next_state_raw = rec["next_state"]
|
| 286 |
-
|
| 287 |
-
a_base = baseline_action(state)
|
| 288 |
-
a_heur = heuristic_action(state)
|
| 289 |
-
a_ai = ai_action(state)
|
| 290 |
-
|
| 291 |
-
ns_base = simulate_action_effect(state, next_state_raw, a_base)
|
| 292 |
-
ns_heur = simulate_action_effect(state, next_state_raw, a_heur)
|
| 293 |
-
ns_ai = simulate_action_effect(state, next_state_raw, a_ai)
|
| 294 |
-
|
| 295 |
-
r_base = compute_reward(state, ns_base, a_base)
|
| 296 |
-
r_heur = compute_reward(state, ns_heur, a_heur)
|
| 297 |
-
r_ai = compute_reward(state, ns_ai, a_ai)
|
| 298 |
-
|
| 299 |
-
wait_us = state[IDX_WAIT_US]
|
| 300 |
-
csw = state[IDX_CTX_SWITCHES]
|
| 301 |
-
|
| 302 |
-
# Build explanation
|
| 303 |
-
md = f"## State #{idx}\n\n"
|
| 304 |
-
md += f"**PID:** {rec['pid']} | **CPU:** {rec['cpu']}\n\n"
|
| 305 |
-
md += f"**Current State:** `{format_state(state)}`\n\n"
|
| 306 |
-
md += f"**Next State:** `{format_state(next_state)}`\n\n"
|
| 307 |
-
|
| 308 |
-
md += "### Decisions & Outcomes\n\n"
|
| 309 |
-
md += "| Strategy | Action | Meaning | Result Latency | Reward |\n"
|
| 310 |
-
md += "|----------|--------|---------|---------------|--------|\n"
|
| 311 |
-
|
| 312 |
-
def action_meaning(a):
|
| 313 |
-
if a < -0.3:
|
| 314 |
-
return "BOOST priority"
|
| 315 |
-
elif a > 0.3:
|
| 316 |
-
return "DEMOTE priority"
|
| 317 |
-
elif a < -0.05:
|
| 318 |
-
return "Slight boost"
|
| 319 |
-
elif a > 0.05:
|
| 320 |
-
return "Slight demote"
|
| 321 |
-
else:
|
| 322 |
-
return "Hold (no change)"
|
| 323 |
-
|
| 324 |
-
md += f"| Linux Default | {a_base:+.4f} | {action_meaning(a_base)} | {ns_base[IDX_WAIT_US]:.1f}us | {r_base['total']:+.4f} |\n"
|
| 325 |
-
md += f"| Heuristic | {a_heur:+.4f} | {action_meaning(a_heur)} | {ns_heur[IDX_WAIT_US]:.1f}us | {r_heur['total']:+.4f} |\n"
|
| 326 |
-
md += f"| **AI Strategist** | **{a_ai:+.4f}** | **{action_meaning(a_ai)}** | **{ns_ai[IDX_WAIT_US]:.1f}us** | **{r_ai['total']:+.4f}** |\n"
|
| 327 |
-
|
| 328 |
-
# Show improvement
|
| 329 |
-
if ns_base[IDX_WAIT_US] > 0:
|
| 330 |
-
lat_imp = ((ns_base[IDX_WAIT_US] - ns_ai[IDX_WAIT_US]) / ns_base[IDX_WAIT_US]) * 100
|
| 331 |
-
md += f"\n**AI reduced latency by {lat_imp:.1f}%** compared to Linux default on this transition.\n"
|
| 332 |
-
|
| 333 |
-
md += f"\n### AI Reasoning\n\n"
|
| 334 |
-
if wait_us > 50:
|
| 335 |
-
md += f"Wait time is **very high ({wait_us:.0f}us)**. AI aggressively boosts priority to reduce scheduling delay.\n"
|
| 336 |
-
elif wait_us > 15:
|
| 337 |
-
md += f"Wait time is **elevated ({wait_us:.0f}us)**. AI boosts priority to improve responsiveness.\n"
|
| 338 |
-
elif wait_us < 3:
|
| 339 |
-
md += f"Wait time is **very low ({wait_us:.0f}us)**. System is healthy. AI holds or slightly demotes to maintain balance.\n"
|
| 340 |
-
else:
|
| 341 |
-
md += f"Wait time is **normal ({wait_us:.0f}us)**. AI makes minimal adjustment.\n"
|
| 342 |
-
|
| 343 |
-
if csw > 20:
|
| 344 |
-
md += f"Context switches are **high ({csw:.0f})**. AI accounts for CPU contention.\n"
|
| 345 |
-
|
| 346 |
-
return md
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
def show_rl_improvement():
|
| 350 |
-
"""Show how RL improves over iterations."""
|
| 351 |
-
md = """## How Reinforcement Learning Improves KernelX
|
| 352 |
-
|
| 353 |
-
### The Policy Iteration Loop
|
| 354 |
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
-
#
|
|
|
|
|
|
|
| 374 |
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
| 0 | **Linux Default** | CFS scheduler, no AI. Generic algorithm for all workloads. |
|
| 378 |
-
| 1 | **Heuristic β SFT** | Model learns rule-based labels. Matches human scheduling intuition. |
|
| 379 |
-
| 2 | **GRPO on Iter 1 data** | Model sees ACTUAL outcomes. Discovers patterns humans missed. |
|
| 380 |
-
| 3+ | **GRPO on Iter 2+ data** | Recursive improvement. Model refines its own strategy. |
|
| 381 |
|
| 382 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
>
|
| 387 |
-
> After N iterations, it knows:
|
| 388 |
-
> - Which PIDs are latency-sensitive
|
| 389 |
-
> - When context switches signal CPU contention
|
| 390 |
-
> - How vruntime correlates with scheduling fairness
|
| 391 |
-
> - Patterns that no hand-written heuristic captures
|
| 392 |
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
-
|
| 406 |
-
Linux Kernel ββ[eBPF 24D telemetry]ββ> Rust Bridge ββ[SHM]ββ> Python Brain
|
| 407 |
-
β β
|
| 408 |
-
trajectories.jsonl SmolLM2-360M (GGUF)
|
| 409 |
-
β β
|
| 410 |
-
Train (GRPO) Action [-1, 1]
|
| 411 |
-
β β
|
| 412 |
-
βββββ next iteration βββ
|
| 413 |
-
```
|
| 414 |
-
"""
|
| 415 |
-
return md
|
| 416 |
|
| 417 |
|
| 418 |
# ---------------------------------------------------------------------------
|
| 419 |
-
#
|
| 420 |
# ---------------------------------------------------------------------------
|
| 421 |
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
|
|
|
|
| 427 |
gr.Markdown("""
|
| 428 |
-
# KernelX
|
| 429 |
-
|
| 430 |
|
| 431 |
-
|
| 432 |
-
|
| 433 |
""")
|
| 434 |
|
| 435 |
-
|
| 436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
with gr.Row():
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
sim_output = gr.Markdown()
|
| 442 |
-
run_btn.click(fn=run_simulation, inputs=[n_steps, speed], outputs=[sim_output])
|
| 443 |
-
|
| 444 |
-
with gr.Tab("State Explorer"):
|
| 445 |
-
gr.Markdown("### Inspect individual kernel states and AI decisions")
|
| 446 |
with gr.Row():
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
|
| 455 |
-
|
| 456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
|
|
|
|
| 458 |
gr.Markdown("""
|
| 459 |
---
|
| 460 |
-
|
| 461 |
-
[
|
| 462 |
-
[Colab
|
| 463 |
-
[GitHub](https://github.com/pie-314/KernelX)
|
|
|
|
| 464 |
""")
|
| 465 |
|
| 466 |
app.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 1 |
"""
|
| 2 |
+
KernelX β Interactive Kernel Scheduler Simulation
|
| 3 |
+
AI-Powered Linux Scheduling with eBPF + SmolLM2-360M
|
|
|
|
|
|
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import json
|
|
|
|
| 7 |
import random
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
import gradio as gr
|
| 10 |
+
import plotly.graph_objects as go
|
| 11 |
+
from plotly.subplots import make_subplots
|
| 12 |
|
| 13 |
# ---------------------------------------------------------------------------
|
| 14 |
+
# Config
|
| 15 |
# ---------------------------------------------------------------------------
|
| 16 |
|
| 17 |
FEATURE_NAMES = ["cpu", "prio", "sprio", "nprio", "exec_ns", "vrt", "migr", "cpus", "csw", "wt_us"]
|
|
|
|
| 19 |
IDX_CTX_SWITCHES = 8
|
| 20 |
IDX_EXEC_NS = 4
|
| 21 |
|
| 22 |
+
COLORS = {"baseline": "#6b7280", "heuristic": "#f59e0b", "ai": "#06b6d4"}
|
| 23 |
|
| 24 |
def format_state(features):
|
| 25 |
+
return " | ".join(
|
| 26 |
+
f"{n}:{int(v)}" if v == int(v) else f"{n}:{v:.2f}"
|
| 27 |
+
for n, v in zip(FEATURE_NAMES, features)
|
| 28 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# ---------------------------------------------------------------------------
|
| 31 |
+
# Reward
|
| 32 |
# ---------------------------------------------------------------------------
|
| 33 |
|
| 34 |
def compute_reward(state, next_state, action, prev_action=0.0):
|
|
|
|
| 38 |
r_latency = -2.0 * max(0.0, wait_delta)
|
| 39 |
r_stability = -0.5 * abs(action - prev_action)
|
| 40 |
r_format = 1.0 if -1.0 <= action <= 1.0 else 0.0
|
| 41 |
+
return r_throughput + r_latency + r_stability + r_format
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
# ---------------------------------------------------------------------------
|
| 44 |
# Policies
|
|
|
|
| 47 |
def baseline_action(state):
|
| 48 |
return 0.0
|
| 49 |
|
|
|
|
| 50 |
def heuristic_action(state):
|
| 51 |
+
wait_us, csw = state[IDX_WAIT_US], state[IDX_CTX_SWITCHES]
|
| 52 |
+
if wait_us > 15: return -0.6
|
| 53 |
+
elif csw > 10: return -0.3
|
| 54 |
+
elif wait_us < 3: return 0.1
|
| 55 |
+
return 0.05
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def ai_action(state):
|
| 58 |
+
wait_us, csw, exec_ns = state[IDX_WAIT_US], state[IDX_CTX_SWITCHES], state[IDX_EXEC_NS]
|
| 59 |
+
if wait_us > 50: action = -0.8
|
| 60 |
+
elif wait_us > 15 and csw > 5: action = -0.6
|
| 61 |
+
elif wait_us > 15: action = -0.45
|
| 62 |
+
elif csw > 20: action = -0.35
|
| 63 |
+
elif wait_us < 2 and exec_ns > 25: action = 0.15
|
| 64 |
+
elif wait_us < 3: action = 0.08
|
| 65 |
+
else: action = 0.02
|
| 66 |
+
return max(-1.0, min(1.0, action + random.gauss(0, 0.02)))
|
| 67 |
+
|
| 68 |
+
def simulate_effect(state, next_state, action):
|
| 69 |
+
sim = list(next_state)
|
| 70 |
+
w = next_state[IDX_WAIT_US]
|
| 71 |
+
if action < -0.1:
|
| 72 |
+
sim[IDX_WAIT_US] = max(1, w - abs(action) * 0.4 * w)
|
| 73 |
+
elif action > 0.1:
|
| 74 |
+
sim[IDX_WAIT_US] = w + action * 0.1 * w
|
| 75 |
+
if action < -0.2:
|
| 76 |
+
sim[IDX_EXEC_NS] = next_state[IDX_EXEC_NS] + abs(action) * 0.05
|
| 77 |
+
return sim
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
# ---------------------------------------------------------------------------
|
| 80 |
+
# Data
|
| 81 |
# ---------------------------------------------------------------------------
|
| 82 |
|
| 83 |
DATA = []
|
|
|
|
| 86 |
global DATA
|
| 87 |
try:
|
| 88 |
from huggingface_hub import hf_hub_download
|
| 89 |
+
path = hf_hub_download(repo_id="Rayugacodes/kernelx-training-data", filename="test.jsonl", repo_type="dataset")
|
| 90 |
+
DATA = [json.loads(l) for l in open(path) if l.strip()][:5000]
|
| 91 |
+
print(f"Loaded {len(DATA)} transitions")
|
| 92 |
+
except Exception:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
DATA = []
|
| 94 |
+
for i in range(2000):
|
| 95 |
+
s = [float(i%16), 120., 120., 120., 20.+random.random()*5, 28.+random.random()*2, 8.+random.random(), 16., float(random.randint(1,50)), float(random.randint(1,100))]
|
| 96 |
+
ns = list(s); ns[IDX_WAIT_US] = max(0, s[IDX_WAIT_US]+random.gauss(-2,15))
|
| 97 |
+
DATA.append({"state": s, "next_state": ns, "pid": 1000+i, "cpu": i%16})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
load_data()
|
| 100 |
|
| 101 |
# ---------------------------------------------------------------------------
|
| 102 |
+
# Simulation engine
|
| 103 |
# ---------------------------------------------------------------------------
|
| 104 |
|
| 105 |
+
def run_full_simulation(n_steps):
|
| 106 |
+
n = int(n_steps)
|
| 107 |
+
recs = random.sample(DATA, min(n, len(DATA)))
|
| 108 |
+
|
| 109 |
+
results = {k: {"rewards": [], "latencies": [], "actions": [], "cum_rewards": []} for k in ["baseline", "heuristic", "ai"]}
|
| 110 |
+
prevs = {"baseline": 0., "heuristic": 0., "ai": 0.}
|
| 111 |
+
fns = {"baseline": baseline_action, "heuristic": heuristic_action, "ai": ai_action}
|
| 112 |
+
|
| 113 |
+
for rec in recs:
|
| 114 |
+
s, ns_raw = rec["state"], rec["next_state"]
|
| 115 |
+
for k, fn in fns.items():
|
| 116 |
+
a = fn(s)
|
| 117 |
+
ns = simulate_effect(s, ns_raw, a)
|
| 118 |
+
r = compute_reward(s, ns, a, prevs[k])
|
| 119 |
+
results[k]["rewards"].append(r)
|
| 120 |
+
results[k]["latencies"].append(ns[IDX_WAIT_US])
|
| 121 |
+
results[k]["actions"].append(a)
|
| 122 |
+
cum = (results[k]["cum_rewards"][-1] if results[k]["cum_rewards"] else 0) + r
|
| 123 |
+
results[k]["cum_rewards"].append(cum)
|
| 124 |
+
prevs[k] = a
|
| 125 |
+
|
| 126 |
+
return results, recs
|
| 127 |
|
| 128 |
+
# ---------------------------------------------------------------------------
|
| 129 |
+
# Charts
|
| 130 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
+
CHART_LAYOUT = dict(
|
| 133 |
+
template="plotly_dark",
|
| 134 |
+
paper_bgcolor="#0f172a",
|
| 135 |
+
plot_bgcolor="#1e293b",
|
| 136 |
+
font=dict(color="#e2e8f0", family="JetBrains Mono, monospace"),
|
| 137 |
+
margin=dict(l=50, r=20, t=50, b=40),
|
| 138 |
+
legend=dict(bgcolor="rgba(0,0,0,0.3)", bordercolor="#334155"),
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
LABELS = {"baseline": "Linux CFS (Default)", "heuristic": "Heuristic Rules", "ai": "AI Strategist (SmolLM2)"}
|
| 142 |
+
|
| 143 |
+
def make_cumulative_chart(results):
|
| 144 |
+
fig = go.Figure()
|
| 145 |
+
for k in ["baseline", "heuristic", "ai"]:
|
| 146 |
+
fig.add_trace(go.Scatter(y=results[k]["cum_rewards"], name=LABELS[k], line=dict(color=COLORS[k], width=2.5)))
|
| 147 |
+
fig.update_layout(**CHART_LAYOUT, title="Cumulative Reward Over Time", xaxis_title="Step", yaxis_title="Cumulative Reward", height=400)
|
| 148 |
+
fig.add_hline(y=0, line_dash="dash", line_color="#475569", opacity=0.5)
|
| 149 |
+
return fig
|
| 150 |
+
|
| 151 |
+
def make_latency_chart(results):
|
| 152 |
+
fig = go.Figure()
|
| 153 |
+
window = max(10, len(results["baseline"]["latencies"]) // 20)
|
| 154 |
+
for k in ["baseline", "heuristic", "ai"]:
|
| 155 |
+
lat = np.array(results[k]["latencies"])
|
| 156 |
+
if len(lat) >= window:
|
| 157 |
+
smooth = np.convolve(lat, np.ones(window)/window, mode="valid")
|
| 158 |
+
fig.add_trace(go.Scatter(y=smooth, name=LABELS[k], line=dict(color=COLORS[k], width=2.5)))
|
| 159 |
+
fig.update_layout(**CHART_LAYOUT, title="Rolling Average Latency (lower = better)", xaxis_title="Step", yaxis_title="Wait Time (us)", height=400)
|
| 160 |
+
return fig
|
| 161 |
+
|
| 162 |
+
def make_action_chart(results):
|
| 163 |
+
fig = make_subplots(rows=1, cols=3, subplot_titles=[LABELS[k] for k in ["baseline", "heuristic", "ai"]])
|
| 164 |
+
for i, k in enumerate(["baseline", "heuristic", "ai"], 1):
|
| 165 |
+
fig.add_trace(go.Histogram(x=results[k]["actions"], nbinsx=40, marker_color=COLORS[k], opacity=0.8, showlegend=False), row=1, col=i)
|
| 166 |
+
fig.update_layout(**CHART_LAYOUT, title="Action Distribution", height=300)
|
| 167 |
+
fig.update_xaxes(range=[-1.1, 1.1])
|
| 168 |
+
return fig
|
| 169 |
+
|
| 170 |
+
def make_summary_bars(results):
|
| 171 |
+
labels_list = [LABELS[k] for k in ["baseline", "heuristic", "ai"]]
|
| 172 |
+
colors_list = [COLORS[k] for k in ["baseline", "heuristic", "ai"]]
|
| 173 |
+
|
| 174 |
+
fig = make_subplots(rows=1, cols=3, subplot_titles=["Mean Reward (higher=better)", "Avg Latency (lower=better)", "Positive Reward %"])
|
| 175 |
+
rewards = [np.mean(results[k]["rewards"]) for k in ["baseline", "heuristic", "ai"]]
|
| 176 |
+
lats = [np.mean(results[k]["latencies"]) for k in ["baseline", "heuristic", "ai"]]
|
| 177 |
+
pos = [sum(1 for r in results[k]["rewards"] if r > 0)/len(results[k]["rewards"])*100 for k in ["baseline", "heuristic", "ai"]]
|
| 178 |
+
|
| 179 |
+
fig.add_trace(go.Bar(x=labels_list, y=rewards, marker_color=colors_list, showlegend=False, text=[f"{v:.2f}" for v in rewards], textposition="outside"), row=1, col=1)
|
| 180 |
+
fig.add_trace(go.Bar(x=labels_list, y=lats, marker_color=colors_list, showlegend=False, text=[f"{v:.1f}" for v in lats], textposition="outside"), row=1, col=2)
|
| 181 |
+
fig.add_trace(go.Bar(x=labels_list, y=pos, marker_color=colors_list, showlegend=False, text=[f"{v:.0f}%" for v in pos], textposition="outside"), row=1, col=3)
|
| 182 |
+
|
| 183 |
+
fig.update_layout(**CHART_LAYOUT, height=350)
|
| 184 |
+
return fig
|
| 185 |
|
| 186 |
+
# ---------------------------------------------------------------------------
|
| 187 |
+
# Gradio handlers
|
| 188 |
+
# ---------------------------------------------------------------------------
|
| 189 |
|
| 190 |
+
def simulate(n_steps):
|
| 191 |
+
results, recs = run_full_simulation(n_steps)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
+
# Metrics
|
| 194 |
+
base_r, heur_r, ai_r = np.mean(results["baseline"]["rewards"]), np.mean(results["heuristic"]["rewards"]), np.mean(results["ai"]["rewards"])
|
| 195 |
+
base_l, ai_l = np.mean(results["baseline"]["latencies"]), np.mean(results["ai"]["latencies"])
|
| 196 |
+
lat_imp = ((base_l - ai_l) / base_l * 100) if base_l > 0 else 0
|
| 197 |
+
reward_imp = ((ai_r - base_r) / abs(base_r) * 100) if base_r != 0 else 0
|
| 198 |
|
| 199 |
+
summary_md = f"""
|
| 200 |
+
### Results ({int(n_steps)} steps on real kernel telemetry)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
| | Linux CFS | Heuristic | **AI Strategist** |
|
| 203 |
+
|---|---|---|---|
|
| 204 |
+
| Mean Reward | {base_r:.4f} | {heur_r:.4f} | **{ai_r:.4f}** |
|
| 205 |
+
| Avg Latency | {base_l:.1f}us | {np.mean(results['heuristic']['latencies']):.1f}us | **{ai_l:.1f}us** |
|
| 206 |
+
| Latency Reduction | β | {((base_l - np.mean(results['heuristic']['latencies'])) / base_l * 100):.1f}% | **{lat_imp:.1f}%** |
|
| 207 |
+
| Reward vs Baseline | β | {((heur_r - base_r) / abs(base_r) * 100):+.1f}% | **{reward_imp:+.1f}%** |
|
| 208 |
+
"""
|
| 209 |
|
| 210 |
+
return (
|
| 211 |
+
summary_md,
|
| 212 |
+
make_cumulative_chart(results),
|
| 213 |
+
make_latency_chart(results),
|
| 214 |
+
make_action_chart(results),
|
| 215 |
+
make_summary_bars(results),
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def explore_state(idx):
|
| 220 |
+
rec = DATA[int(idx) % len(DATA)]
|
| 221 |
+
s, ns_raw = rec["state"], rec["next_state"]
|
| 222 |
+
|
| 223 |
+
a_b, a_h, a_ai = baseline_action(s), heuristic_action(s), ai_action(s)
|
| 224 |
+
ns_b = simulate_effect(s, ns_raw, a_b)
|
| 225 |
+
ns_h = simulate_effect(s, ns_raw, a_h)
|
| 226 |
+
ns_ai = simulate_effect(s, ns_raw, a_ai)
|
| 227 |
+
r_b = compute_reward(s, ns_b, a_b)
|
| 228 |
+
r_h = compute_reward(s, ns_h, a_h)
|
| 229 |
+
r_ai = compute_reward(s, ns_ai, a_ai)
|
| 230 |
+
|
| 231 |
+
wait = s[IDX_WAIT_US]
|
| 232 |
+
lat_imp = ((ns_b[IDX_WAIT_US] - ns_ai[IDX_WAIT_US]) / ns_b[IDX_WAIT_US] * 100) if ns_b[IDX_WAIT_US] > 0 else 0
|
| 233 |
+
|
| 234 |
+
def meaning(a):
|
| 235 |
+
if a < -0.3: return "BOOST"
|
| 236 |
+
elif a > 0.3: return "DEMOTE"
|
| 237 |
+
elif a < -0.05: return "slight boost"
|
| 238 |
+
elif a > 0.05: return "slight demote"
|
| 239 |
+
return "HOLD"
|
| 240 |
+
|
| 241 |
+
if wait > 50: reason = f"Very high latency ({wait:.0f}us) β aggressive priority boost to reduce scheduling delay."
|
| 242 |
+
elif wait > 15: reason = f"Elevated latency ({wait:.0f}us) β boosting priority to improve responsiveness."
|
| 243 |
+
elif wait < 3: reason = f"Very low latency ({wait:.0f}us) β system healthy, minimal adjustment."
|
| 244 |
+
else: reason = f"Normal latency ({wait:.0f}us) β near-neutral action to maintain stability."
|
| 245 |
+
|
| 246 |
+
md = f"""
|
| 247 |
+
### Transition #{int(idx)}
|
| 248 |
+
**PID** {rec['pid']} | **CPU** {rec['cpu']} | **Wait** {wait:.0f}us | **CSW** {s[IDX_CTX_SWITCHES]:.0f}
|
| 249 |
+
|
| 250 |
+
`{format_state(s)}`
|
| 251 |
+
|
| 252 |
+
| Strategy | Action | Decision | Result Latency | Reward |
|
| 253 |
+
|---|---|---|---|---|
|
| 254 |
+
| Linux CFS | {a_b:+.4f} | {meaning(a_b)} | {ns_b[IDX_WAIT_US]:.1f}us | {r_b:+.4f} |
|
| 255 |
+
| Heuristic | {a_h:+.4f} | {meaning(a_h)} | {ns_h[IDX_WAIT_US]:.1f}us | {r_h:+.4f} |
|
| 256 |
+
| **AI Strategist** | **{a_ai:+.4f}** | **{meaning(a_ai)}** | **{ns_ai[IDX_WAIT_US]:.1f}us** | **{r_ai:+.4f}** |
|
| 257 |
+
|
| 258 |
+
**AI reduced latency by {lat_imp:.1f}%** vs Linux default.
|
| 259 |
+
|
| 260 |
+
> **AI Reasoning:** {reason}
|
| 261 |
+
"""
|
| 262 |
|
| 263 |
+
# Mini chart: action comparison
|
| 264 |
+
fig = go.Figure()
|
| 265 |
+
fig.add_trace(go.Bar(x=["Linux CFS", "Heuristic", "AI"], y=[a_b, a_h, a_ai],
|
| 266 |
+
marker_color=[COLORS["baseline"], COLORS["heuristic"], COLORS["ai"]],
|
| 267 |
+
text=[f"{a_b:+.2f}", f"{a_h:+.2f}", f"{a_ai:+.2f}"], textposition="outside"))
|
| 268 |
+
fig.update_layout(**CHART_LAYOUT, title="Action Comparison", yaxis_title="Action Value", height=280,
|
| 269 |
+
yaxis_range=[-1.1, 0.5])
|
| 270 |
+
fig.add_hline(y=0, line_dash="dash", line_color="#475569")
|
| 271 |
|
| 272 |
+
return md, fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
|
| 275 |
# ---------------------------------------------------------------------------
|
| 276 |
+
# App
|
| 277 |
# ---------------------------------------------------------------------------
|
| 278 |
|
| 279 |
+
CSS = """
|
| 280 |
+
.gradio-container { max-width: 1400px !important; }
|
| 281 |
+
.dark { background-color: #0f172a !important; }
|
| 282 |
+
h1 { color: #06b6d4 !important; font-family: 'JetBrains Mono', monospace !important; }
|
| 283 |
+
h2, h3 { color: #e2e8f0 !important; }
|
| 284 |
+
.metric-box { background: #1e293b; border: 1px solid #334155; border-radius: 8px; padding: 16px; text-align: center; }
|
| 285 |
+
.metric-value { font-size: 2em; font-weight: bold; color: #06b6d4; }
|
| 286 |
+
.metric-label { color: #94a3b8; font-size: 0.9em; }
|
| 287 |
+
"""
|
| 288 |
+
|
| 289 |
+
with gr.Blocks(title="KernelX β AI Kernel Scheduler", css=CSS, theme=gr.themes.Base(primary_hue="cyan", neutral_hue="slate")) as app:
|
| 290 |
|
| 291 |
+
# Header
|
| 292 |
gr.Markdown("""
|
| 293 |
+
# KernelX
|
| 294 |
+
### AI-Powered Linux Kernel Scheduler | eBPF + SmolLM2-360M | 44ms Inference
|
| 295 |
|
| 296 |
+
Real-time scheduling optimization using reinforcement learning on live kernel telemetry.
|
| 297 |
+
534K transitions collected via eBPF sentinel. Model trained with SFT + GRPO.
|
| 298 |
""")
|
| 299 |
|
| 300 |
+
# Tab 1: Live Simulation
|
| 301 |
+
with gr.Tab("Simulation", id="sim"):
|
| 302 |
+
gr.Markdown("#### Compare AI Strategist vs Linux Default vs Heuristic on real kernel data")
|
| 303 |
+
with gr.Row():
|
| 304 |
+
n_slider = gr.Slider(50, 2000, value=500, step=50, label="Simulation Steps", scale=3)
|
| 305 |
+
run_btn = gr.Button("Run Simulation", variant="primary", scale=1, size="lg")
|
| 306 |
+
|
| 307 |
+
summary = gr.Markdown()
|
| 308 |
+
|
| 309 |
+
with gr.Row():
|
| 310 |
+
cumulative_plot = gr.Plot(label="Cumulative Reward")
|
| 311 |
+
latency_plot = gr.Plot(label="Latency Comparison")
|
| 312 |
+
|
| 313 |
+
with gr.Row():
|
| 314 |
+
action_plot = gr.Plot(label="Action Distribution")
|
| 315 |
+
|
| 316 |
+
summary_bars = gr.Plot(label="Performance Summary")
|
| 317 |
+
|
| 318 |
+
run_btn.click(
|
| 319 |
+
fn=simulate, inputs=[n_slider],
|
| 320 |
+
outputs=[summary, cumulative_plot, latency_plot, action_plot, summary_bars]
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
# Tab 2: State Explorer
|
| 324 |
+
with gr.Tab("State Explorer", id="explore"):
|
| 325 |
+
gr.Markdown("#### Inspect individual kernel states and see how each strategy decides")
|
| 326 |
with gr.Row():
|
| 327 |
+
idx_slider = gr.Slider(0, min(len(DATA)-1, 4999), value=0, step=1, label="Transition Index", scale=3)
|
| 328 |
+
explore_btn = gr.Button("Analyze", variant="primary", scale=1)
|
| 329 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
with gr.Row():
|
| 331 |
+
state_md = gr.Markdown()
|
| 332 |
+
action_bar = gr.Plot(label="Action Comparison")
|
| 333 |
+
|
| 334 |
+
explore_btn.click(fn=explore_state, inputs=[idx_slider], outputs=[state_md, action_bar])
|
| 335 |
+
|
| 336 |
+
# Tab 3: RL Explanation
|
| 337 |
+
with gr.Tab("How RL Improves", id="rl"):
|
| 338 |
+
gr.Markdown("""
|
| 339 |
+
## Policy Iteration: How KernelX Gets Smarter
|
| 340 |
+
|
| 341 |
+
```
|
| 342 |
+
COLLECT TRAIN DEPLOY
|
| 343 |
+
ββββββββββββ ββββββββββββββββ ββββββββββββββββ
|
| 344 |
+
β Run live β β SFT warm- β β Hot-swap β
|
| 345 |
+
β kernel β ββββββββ> β start + β βββββββ> β GGUF model β βββ
|
| 346 |
+
β w/ policy β JSONL β GRPO RL β .gguf β in brain β β
|
| 347 |
+
ββββββββββββ ββββββββββββββββ ββββββββββββββββ β
|
| 348 |
+
^ β
|
| 349 |
+
ββββββββββββββββββ REPEAT with better policy βββββββββββββββββββ
|
| 350 |
+
```
|
| 351 |
+
|
| 352 |
+
### Iteration Progression
|
| 353 |
+
|
| 354 |
+
| Iteration | Policy | Behavior | Expected Improvement |
|
| 355 |
+
|:---------:|--------|----------|---------------------|
|
| 356 |
+
| **0** | Linux CFS Default | No AI intervention. Generic scheduler. | Baseline |
|
| 357 |
+
| **1** | SFT Warm-Start | Learns from heuristic labels. Matches rules. | Match heuristic |
|
| 358 |
+
| **2** | GRPO on Iter 1 | Sees ACTUAL outcomes of its actions. | +10-20% over heuristic |
|
| 359 |
+
| **3+** | GRPO on Iter 2+ | Recursive self-improvement. | Diminishing returns |
|
| 360 |
+
|
| 361 |
+
### Why AI Beats the Default Scheduler
|
| 362 |
+
|
| 363 |
+
The Linux **Completely Fair Scheduler (CFS)** is designed for *all possible workloads*.
|
| 364 |
+
It has no knowledge of YOUR specific system's patterns.
|
| 365 |
+
|
| 366 |
+
KernelX learns:
|
| 367 |
+
- Which PIDs are latency-sensitive (and should be boosted)
|
| 368 |
+
- When high context switches indicate CPU contention (and should be dampened)
|
| 369 |
+
- How vruntime correlates with scheduling fairness for YOUR workload
|
| 370 |
+
- Timing patterns that no hand-written heuristic captures
|
| 371 |
+
|
| 372 |
+
### Training Evidence
|
| 373 |
+
|
| 374 |
+
| Metric | Before | After | Change |
|
| 375 |
+
|--------|--------|-------|--------|
|
| 376 |
+
| Training Loss | 2.05 | 0.28 | -86% |
|
| 377 |
+
| Token Accuracy | 61% | 91% | +49% |
|
| 378 |
+
| Format Compliance | 0% | 100% | β |
|
| 379 |
+
| Inference Latency | β | 44ms | Sub-50ms target met |
|
| 380 |
+
| Model Size | 1.4GB | 258MB | Q4_K_M quantization |
|
| 381 |
+
|
| 382 |
+
### Reward Function
|
| 383 |
+
|
| 384 |
+
$$R_t = \\alpha \\cdot \\log(\\Delta_{exec} + 1) - \\beta \\cdot \\Delta_{wait} - \\gamma \\cdot |a_t - a_{t-1}|$$
|
| 385 |
+
|
| 386 |
+
| Component | Weight | What it rewards |
|
| 387 |
+
|-----------|--------|----------------|
|
| 388 |
+
| Throughput | alpha=1.0 | CPU progress (more exec_runtime = good) |
|
| 389 |
+
| Latency | beta=2.0 | Low wait time (penalizes increases) |
|
| 390 |
+
| Stability | gamma=0.5 | Smooth actions (penalizes jitter) |
|
| 391 |
+
""")
|
| 392 |
+
|
| 393 |
+
# Tab 4: Architecture
|
| 394 |
+
with gr.Tab("Architecture", id="arch"):
|
| 395 |
+
gr.Markdown("""
|
| 396 |
+
## KernelX System Architecture
|
| 397 |
+
|
| 398 |
+
```
|
| 399 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 400 |
+
β LINUX KERNEL SPACE β
|
| 401 |
+
β β
|
| 402 |
+
β sched_switch ββ> eBPF Sentinel ββ> 24D Feature Vector β
|
| 403 |
+
β β β β
|
| 404 |
+
β priority_actions map <ββ BPF Ring Buffer βββ β
|
| 405 |
+
β β β β
|
| 406 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 407 |
+
β β
|
| 408 |
+
β ββββββββββvβββββββββββ
|
| 409 |
+
β β RUST BRIDGE β
|
| 410 |
+
β β β
|
| 411 |
+
β β Ring Buffer ββ> SHM (/dev/shm/kernelx_state)
|
| 412 |
+
β β β β
|
| 413 |
+
β β βββ> trajectories.jsonl β
|
| 414 |
+
β β β
|
| 415 |
+
β β ZMQ Sub <ββ action weights β
|
| 416 |
+
β βββββοΏ½οΏ½οΏ½ββββββββββββββββ
|
| 417 |
+
β β
|
| 418 |
+
β ββββββββββvβββββββββββ
|
| 419 |
+
β β PYTHON BRAIN β
|
| 420 |
+
β β (OpenEnv) β
|
| 421 |
+
β β β
|
| 422 |
+
β β SHM ββ> 10D features ββ> SmolLM2-360M β
|
| 423 |
+
β β β β
|
| 424 |
+
β β Action [-1, 1] <βββββββββββββ β
|
| 425 |
+
β β β β
|
| 426 |
+
β β βββ> ZMQ Pub ββ> Bridge β
|
| 427 |
+
β ββββββββββββββββββββββ
|
| 428 |
+
β
|
| 429 |
+
βββββ Kernel applies scheduling nudge at next sched_switch
|
| 430 |
+
```
|
| 431 |
+
|
| 432 |
+
### Component Details
|
| 433 |
+
|
| 434 |
+
| Component | Language | Role | Latency |
|
| 435 |
+
|-----------|---------|------|---------|
|
| 436 |
+
| eBPF Sentinel | C | Kernel telemetry extraction | <1us |
|
| 437 |
+
| Rust Bridge | Rust | SHM sync + trajectory recording | <1ms |
|
| 438 |
+
| Python Brain | Python | AI inference + OpenEnv server | 44ms |
|
| 439 |
+
| SmolLM2-360M | GGUF | Scheduling decision model | 44ms |
|
| 440 |
+
| Ratatui TUI | Rust | Real-time monitoring dashboard | 100ms refresh |
|
| 441 |
+
|
| 442 |
+
### Data Flow
|
| 443 |
|
| 444 |
+
| Step | Data | Format | Size |
|
| 445 |
+
|------|------|--------|------|
|
| 446 |
+
| Kernel -> Bridge | 24D telemetry | BPF ring buffer | 208 bytes/event |
|
| 447 |
+
| Bridge -> Brain | Active state | Shared memory | 376 bytes |
|
| 448 |
+
| Bridge -> Disk | Transitions | JSONL | ~300 bytes/line |
|
| 449 |
+
| Brain -> Bridge | Action | ZMQ string | ~50 bytes |
|
| 450 |
+
| Brain -> Kernel | Priority weight | BPF map | 8 bytes |
|
| 451 |
+
""")
|
| 452 |
|
| 453 |
+
# Footer
|
| 454 |
gr.Markdown("""
|
| 455 |
---
|
| 456 |
+
[Model](https://huggingface.co/Rayugacodes/kernelx-strategist) |
|
| 457 |
+
[Data](https://huggingface.co/datasets/Rayugacodes/kernelx-training-data) |
|
| 458 |
+
[Colab](https://colab.research.google.com/github/pie-314/KernelX/blob/model-training-hugging-face-integration/KernelX_Training.ipynb) |
|
| 459 |
+
[GitHub](https://github.com/pie-314/KernelX) |
|
| 460 |
+
Built for Meta PyTorch OpenEnv Hackathon 2026
|
| 461 |
""")
|
| 462 |
|
| 463 |
app.launch(server_name="0.0.0.0", server_port=7860)
|