Mentors4EDU's picture
Upload 41 files
3f2dde4 verified
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import time
from math import exp
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from .hardware import collect_hardware_specs, hardware_table_rows
from .smoke import run_tinygrad_gate_demo
@dataclass(slots=True)
class BenchmarkResult:
csv_path: str
chart_paths: list[str]
def _chart_backend():
try:
from openbb_charting.charts.generic_charts import bar_chart, line_chart # type: ignore[import-not-found]
return "openbb", line_chart, bar_chart
except Exception:
return "plotly", None, None
def _save_figure(fig, path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
if hasattr(fig, "show"):
try:
fig = fig.show(external=True)
except TypeError:
pass
if hasattr(fig, "write_html"):
fig.write_html(str(path))
return
raise RuntimeError("chart object does not support HTML export")
def _make_dashboard(df: pd.DataFrame, gate_df: pd.DataFrame, output_path: Path) -> None:
hardware_specs = collect_hardware_specs()
hardware_rows = hardware_table_rows(hardware_specs)
hardware_table = pd.DataFrame(hardware_rows)
fig = make_subplots(
rows=5,
cols=3,
specs=[
[{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}],
[{"type": "xy"}, {"type": "xy"}, {"type": "xy", "secondary_y": True}],
[{"type": "xy"}, {"type": "xy"}, {"type": "xy"}],
[{"type": "xy"}, {"type": "heatmap"}, {"type": "xy"}],
[{"type": "table", "colspan": 3}, None, None],
],
row_heights=[0.12, 0.22, 0.20, 0.22, 0.24],
subplot_titles=(
"Final Accuracy",
"Predictability",
"Peak Memory",
"Accuracy vs Epoch",
"Loss vs Epoch",
"Memory and Processes",
"Training Time",
"Training Steps",
"Throughput",
"Learned Gates",
"Metric Correlation",
"Accuracy vs Loss",
"Hardware Specs",
),
vertical_spacing=0.08,
horizontal_spacing=0.06,
)
latest = df.iloc[-1]
indicators = [
(latest["final_accuracy"], ".1%", "#22c55e", "Accuracy"),
(latest["predictability_score"], ".2f", "#38bdf8", "Predictability"),
(latest["memory_rss_mb"], ".1f", "#f97316", "RSS MB"),
]
initial_indicator_values = [
float(df.iloc[0]["final_accuracy"]),
float(df.iloc[0]["predictability_score"]),
float(df.iloc[0]["memory_rss_mb"]),
]
for idx, (value, fmt, color, title) in enumerate(indicators, start=1):
fig.add_trace(
go.Indicator(
mode="number+delta",
value=float(value),
number={"valueformat": fmt, "font": {"size": 24, "color": color}},
title={"text": title, "font": {"size": 14, "color": "#e2e8f0"}},
delta={"reference": initial_indicator_values[idx - 1], "relative": False},
),
row=1,
col=idx,
)
fig.add_trace(go.Scatter(x=df["epoch"], y=df["final_accuracy"], mode="lines+markers", line=dict(color="#22c55e", width=3), name="Accuracy", showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=df["epoch"], y=df["predictability_score"], mode="lines+markers", line=dict(color="#38bdf8", width=3), name="Predictability", showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=df["epoch"], y=df["final_loss"], mode="lines+markers", line=dict(color="#f97316", width=3), name="Loss", showlegend=False), row=2, col=2)
fig.add_trace(go.Scatter(x=df["epoch"], y=df["wall_time_sec"], mode="lines+markers", line=dict(color="#a855f7", width=3), name="Wall Time (s)", showlegend=False), row=2, col=3, secondary_y=False)
fig.add_trace(go.Bar(x=df["epoch"], y=df["memory_rss_mb"], marker_color="#f97316", name="Memory MB", showlegend=False), row=2, col=3, secondary_y=True)
fig.add_trace(go.Scatter(x=df["epoch"], y=df["wall_time_sec"], mode="lines+markers", line=dict(color="#a855f7", width=3), name="Wall Time (s)", showlegend=False), row=3, col=1)
fig.add_trace(go.Scatter(x=df["epoch"], y=df["steps"], mode="lines+markers", line=dict(color="#14b8a6", width=3), name="Training Steps", showlegend=False), row=3, col=2)
fig.add_trace(go.Bar(x=df["epoch"], y=df["samples_per_sec"], marker_color="#38bdf8", name="Samples/sec", showlegend=False), row=3, col=3)
fig.add_trace(go.Bar(x=gate_df["channel"], y=gate_df["gate_scale"], marker_color="#a855f7", name="Gate Scale", showlegend=False), row=4, col=1)
corr_df = df[["final_accuracy", "predictability_score", "final_loss", "wall_time_sec", "memory_rss_mb", "samples_per_sec"]].corr()
fig.add_trace(go.Heatmap(z=corr_df.values, x=corr_df.columns, y=corr_df.index, colorscale="RdBu", zmid=0, showscale=False), row=4, col=2)
fig.add_trace(go.Scatter(x=df["final_loss"], y=df["final_accuracy"], mode="markers+text", text=df["epoch"].astype(str), textposition="top center", marker=dict(size=14, color=df["memory_rss_mb"], colorscale="Viridis", showscale=True), name="Accuracy/Loss", showlegend=False), row=4, col=3)
fig.add_trace(
go.Table(
header=dict(
values=["<b>Metric</b>", "<b>Value</b>"],
fill_color="#0f172a",
font=dict(color="#e2e8f0", size=14),
align="left",
height=28,
),
cells=dict(
values=[hardware_table["Metric"], hardware_table["Value"]],
fill_color="#111827",
font=dict(color="#e2e8f0", size=12),
align="left",
height=24,
),
),
row=5,
col=1,
)
fig.update_layout(
template="plotly_dark",
height=1950,
width=2000,
title_text="OpenPeer NTK Trainer Benchmark Dashboard",
paper_bgcolor="#0f172a",
plot_bgcolor="#0f172a",
font=dict(color="#e2e8f0", size=12),
showlegend=False,
margin=dict(l=30, r=30, t=90, b=30),
title_x=0.02,
)
fig.update_annotations(font=dict(size=13, color="#e2e8f0"), yshift=10)
fig.update_yaxes(title_text="Seconds", row=2, col=3, secondary_y=False)
fig.update_yaxes(title_text="Memory MB", row=2, col=3, secondary_y=True)
fig.update_xaxes(title_text="Epoch", row=2, col=1)
fig.update_xaxes(title_text="Epoch", row=2, col=2)
fig.update_xaxes(title_text="Epoch", row=2, col=3)
fig.update_xaxes(title_text="Epoch", row=3, col=1)
fig.update_xaxes(title_text="Epoch", row=3, col=2)
fig.update_xaxes(title_text="Epoch", row=3, col=3)
fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=1)
fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=2)
fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=3)
fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=1)
fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=2)
fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=3)
fig.write_html(str(output_path), include_plotlyjs="cdn")
def _make_line_chart(df: pd.DataFrame, y: str, title: str, color: str, output_path: Path):
backend, line_chart, _ = _chart_backend()
if backend == "openbb" and line_chart is not None:
fig = line_chart(
data=df,
x="steps",
y=y,
title=title,
xtitle="Training steps",
ytitle=y.replace("_", " ").title(),
render=False,
layout_kwargs={
"template": "plotly_dark",
"paper_bgcolor": "#0f172a",
"plot_bgcolor": "#0f172a",
"font": {"color": "#e2e8f0"},
},
scatter_kwargs={"line": {"color": color, "width": 3}},
)
_save_figure(fig, output_path)
return
import plotly.express as px
fig = px.line(df, x="steps", y=y, markers=True, title=title, template="plotly_dark", color_discrete_sequence=[color])
fig.update_layout(paper_bgcolor="#0f172a", plot_bgcolor="#0f172a", font=dict(color="#e2e8f0"))
fig.write_html(str(output_path))
def _make_bar_chart(df: pd.DataFrame, x: str, y: str, title: str, color: str, output_path: Path):
backend, _, bar_chart = _chart_backend()
if backend == "openbb" and bar_chart is not None:
fig = bar_chart(
data=df,
x=x,
y=y,
title=title,
xtitle=x.replace("_", " ").title(),
ytitle=y.replace("_", " ").title(),
render=False,
colors=[color],
layout_kwargs={
"template": "plotly_dark",
"paper_bgcolor": "#0f172a",
"plot_bgcolor": "#0f172a",
"font": {"color": "#e2e8f0"},
},
)
_save_figure(fig, output_path)
return
import plotly.express as px
fig = px.bar(df, x=x, y=y, title=title, template="plotly_dark", color_discrete_sequence=[color])
fig.update_layout(paper_bgcolor="#0f172a", plot_bgcolor="#0f172a", font=dict(color="#e2e8f0"))
fig.write_html(str(output_path))
def run_benchmark_suite(
step_counts: list[int],
batch_size: int = 64,
seed: int = 0,
output_dir: str = "artifacts/benchmarks",
target_accuracy: float = 0.99,
) -> BenchmarkResult:
out_dir = Path(output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
rows: list[dict[str, float]] = []
last_result = None
for epoch, steps in enumerate(step_counts, start=1):
started = time.perf_counter()
result = run_tinygrad_gate_demo(steps=steps, batch_size=batch_size, seed=seed, target_accuracy=target_accuracy)
elapsed = time.perf_counter() - started
memory_rss_mb = result.telemetry[-1].memory_rss_mb if result.telemetry else 0.0
child_processes = result.telemetry[-1].child_processes if result.telemetry else 0
thread_count = result.telemetry[-1].thread_count if result.telemetry else 0
predictability_score = exp(-result.final_loss) * 100.0
rows.append(
{
"epoch": int(epoch),
"steps": int(result.trained_steps),
"wall_time_sec": elapsed,
"samples_per_sec": (steps * batch_size) / max(elapsed, 1e-9),
"initial_accuracy": result.initial_accuracy,
"final_accuracy": result.final_accuracy,
"final_loss": result.final_loss,
"predictability_score": predictability_score,
"memory_rss_mb": memory_rss_mb,
"child_processes": float(child_processes),
"thread_count": float(thread_count),
"reached_target": int(1 if result.reached_target else 0),
"trained_steps": int(result.trained_steps),
"target_accuracy": result.target_accuracy,
}
)
last_result = result
df = pd.DataFrame(rows).sort_values("steps")
csv_path = out_dir / "gate_benchmarks.csv"
df.to_csv(csv_path, index=False)
if not df.empty and df["final_accuracy"].iloc[-1] < target_accuracy:
extended_step = int(max(df["steps"].iloc[-1] * 2, 256))
while df["final_accuracy"].iloc[-1] < target_accuracy and extended_step <= 4096:
started = time.perf_counter()
result = run_tinygrad_gate_demo(steps=extended_step, batch_size=batch_size, seed=seed, target_accuracy=target_accuracy)
elapsed = time.perf_counter() - started
memory_rss_mb = result.telemetry[-1].memory_rss_mb if result.telemetry else 0.0
child_processes = result.telemetry[-1].child_processes if result.telemetry else 0
thread_count = result.telemetry[-1].thread_count if result.telemetry else 0
predictability_score = exp(-result.final_loss) * 100.0
df = pd.concat([
df,
pd.DataFrame([
{
"epoch": int(df["epoch"].iloc[-1] + 1),
"steps": int(result.trained_steps),
"wall_time_sec": elapsed,
"samples_per_sec": (extended_step * batch_size) / max(elapsed, 1e-9),
"initial_accuracy": result.initial_accuracy,
"final_accuracy": result.final_accuracy,
"final_loss": result.final_loss,
"predictability_score": predictability_score,
"memory_rss_mb": memory_rss_mb,
"child_processes": float(child_processes),
"thread_count": float(thread_count),
"reached_target": int(1 if result.reached_target else 0),
"trained_steps": int(result.trained_steps),
"target_accuracy": result.target_accuracy,
}
])
], ignore_index=True)
extended_step *= 2
df.to_csv(csv_path, index=False)
chart_paths: list[str] = []
gate_df = pd.DataFrame(
{
"channel": [f"c{i}" for i in range(len(last_result.learned_gates))] if last_result is not None else [],
"gate_scale": last_result.learned_gates if last_result is not None else [],
}
)
dashboard_path = out_dir / "benchmark_dashboard.html"
_make_dashboard(df, gate_df, dashboard_path)
chart_paths.append(str(dashboard_path))
accuracy_chart = out_dir / "accuracy_curve.html"
_make_line_chart(df, "final_accuracy", "Gate Controller Accuracy vs Training Steps", "#22c55e", accuracy_chart)
chart_paths.append(str(accuracy_chart))
loss_chart = out_dir / "loss_curve.html"
_make_line_chart(df, "final_loss", "Gate Controller Loss vs Training Steps", "#f97316", loss_chart)
chart_paths.append(str(loss_chart))
throughput_chart = out_dir / "throughput_curve.html"
_make_line_chart(df, "samples_per_sec", "Gate Controller Throughput vs Training Steps", "#38bdf8", throughput_chart)
chart_paths.append(str(throughput_chart))
if last_result is not None:
gate_sample_df = pd.DataFrame(
{
"channel": [f"c{i}" for i in range(len(last_result.learned_gate_sample))],
"gate_scale": last_result.learned_gate_sample,
}
)
gate_chart = out_dir / "learned_gates.html"
_make_bar_chart(gate_sample_df, "channel", "gate_scale", "Learned Gate Scales", "#a855f7", gate_chart)
chart_paths.append(str(gate_chart))
return BenchmarkResult(csv_path=str(csv_path), chart_paths=chart_paths)