from __future__ import annotations from dataclasses import dataclass from pathlib import Path import time from math import exp import pandas as pd from plotly.subplots import make_subplots import plotly.graph_objects as go from .hardware import collect_hardware_specs, hardware_table_rows from .smoke import run_tinygrad_gate_demo @dataclass(slots=True) class BenchmarkResult: csv_path: str chart_paths: list[str] def _chart_backend(): try: from openbb_charting.charts.generic_charts import bar_chart, line_chart # type: ignore[import-not-found] return "openbb", line_chart, bar_chart except Exception: return "plotly", None, None def _save_figure(fig, path: Path) -> None: path.parent.mkdir(parents=True, exist_ok=True) if hasattr(fig, "show"): try: fig = fig.show(external=True) except TypeError: pass if hasattr(fig, "write_html"): fig.write_html(str(path)) return raise RuntimeError("chart object does not support HTML export") def _make_dashboard(df: pd.DataFrame, gate_df: pd.DataFrame, output_path: Path) -> None: hardware_specs = collect_hardware_specs() hardware_rows = hardware_table_rows(hardware_specs) hardware_table = pd.DataFrame(hardware_rows) fig = make_subplots( rows=5, cols=3, specs=[ [{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}], [{"type": "xy"}, {"type": "xy"}, {"type": "xy", "secondary_y": True}], [{"type": "xy"}, {"type": "xy"}, {"type": "xy"}], [{"type": "xy"}, {"type": "heatmap"}, {"type": "xy"}], [{"type": "table", "colspan": 3}, None, None], ], row_heights=[0.12, 0.22, 0.20, 0.22, 0.24], subplot_titles=( "Final Accuracy", "Predictability", "Peak Memory", "Accuracy vs Epoch", "Loss vs Epoch", "Memory and Processes", "Training Time", "Training Steps", "Throughput", "Learned Gates", "Metric Correlation", "Accuracy vs Loss", "Hardware Specs", ), vertical_spacing=0.08, horizontal_spacing=0.06, ) latest = df.iloc[-1] indicators = [ (latest["final_accuracy"], ".1%", "#22c55e", "Accuracy"), (latest["predictability_score"], ".2f", "#38bdf8", "Predictability"), (latest["memory_rss_mb"], ".1f", "#f97316", "RSS MB"), ] initial_indicator_values = [ float(df.iloc[0]["final_accuracy"]), float(df.iloc[0]["predictability_score"]), float(df.iloc[0]["memory_rss_mb"]), ] for idx, (value, fmt, color, title) in enumerate(indicators, start=1): fig.add_trace( go.Indicator( mode="number+delta", value=float(value), number={"valueformat": fmt, "font": {"size": 24, "color": color}}, title={"text": title, "font": {"size": 14, "color": "#e2e8f0"}}, delta={"reference": initial_indicator_values[idx - 1], "relative": False}, ), row=1, col=idx, ) fig.add_trace(go.Scatter(x=df["epoch"], y=df["final_accuracy"], mode="lines+markers", line=dict(color="#22c55e", width=3), name="Accuracy", showlegend=False), row=2, col=1) fig.add_trace(go.Scatter(x=df["epoch"], y=df["predictability_score"], mode="lines+markers", line=dict(color="#38bdf8", width=3), name="Predictability", showlegend=False), row=2, col=1) fig.add_trace(go.Scatter(x=df["epoch"], y=df["final_loss"], mode="lines+markers", line=dict(color="#f97316", width=3), name="Loss", showlegend=False), row=2, col=2) fig.add_trace(go.Scatter(x=df["epoch"], y=df["wall_time_sec"], mode="lines+markers", line=dict(color="#a855f7", width=3), name="Wall Time (s)", showlegend=False), row=2, col=3, secondary_y=False) fig.add_trace(go.Bar(x=df["epoch"], y=df["memory_rss_mb"], marker_color="#f97316", name="Memory MB", showlegend=False), row=2, col=3, secondary_y=True) fig.add_trace(go.Scatter(x=df["epoch"], y=df["wall_time_sec"], mode="lines+markers", line=dict(color="#a855f7", width=3), name="Wall Time (s)", showlegend=False), row=3, col=1) fig.add_trace(go.Scatter(x=df["epoch"], y=df["steps"], mode="lines+markers", line=dict(color="#14b8a6", width=3), name="Training Steps", showlegend=False), row=3, col=2) fig.add_trace(go.Bar(x=df["epoch"], y=df["samples_per_sec"], marker_color="#38bdf8", name="Samples/sec", showlegend=False), row=3, col=3) fig.add_trace(go.Bar(x=gate_df["channel"], y=gate_df["gate_scale"], marker_color="#a855f7", name="Gate Scale", showlegend=False), row=4, col=1) corr_df = df[["final_accuracy", "predictability_score", "final_loss", "wall_time_sec", "memory_rss_mb", "samples_per_sec"]].corr() fig.add_trace(go.Heatmap(z=corr_df.values, x=corr_df.columns, y=corr_df.index, colorscale="RdBu", zmid=0, showscale=False), row=4, col=2) fig.add_trace(go.Scatter(x=df["final_loss"], y=df["final_accuracy"], mode="markers+text", text=df["epoch"].astype(str), textposition="top center", marker=dict(size=14, color=df["memory_rss_mb"], colorscale="Viridis", showscale=True), name="Accuracy/Loss", showlegend=False), row=4, col=3) fig.add_trace( go.Table( header=dict( values=["Metric", "Value"], fill_color="#0f172a", font=dict(color="#e2e8f0", size=14), align="left", height=28, ), cells=dict( values=[hardware_table["Metric"], hardware_table["Value"]], fill_color="#111827", font=dict(color="#e2e8f0", size=12), align="left", height=24, ), ), row=5, col=1, ) fig.update_layout( template="plotly_dark", height=1950, width=2000, title_text="OpenPeer NTK Trainer Benchmark Dashboard", paper_bgcolor="#0f172a", plot_bgcolor="#0f172a", font=dict(color="#e2e8f0", size=12), showlegend=False, margin=dict(l=30, r=30, t=90, b=30), title_x=0.02, ) fig.update_annotations(font=dict(size=13, color="#e2e8f0"), yshift=10) fig.update_yaxes(title_text="Seconds", row=2, col=3, secondary_y=False) fig.update_yaxes(title_text="Memory MB", row=2, col=3, secondary_y=True) fig.update_xaxes(title_text="Epoch", row=2, col=1) fig.update_xaxes(title_text="Epoch", row=2, col=2) fig.update_xaxes(title_text="Epoch", row=2, col=3) fig.update_xaxes(title_text="Epoch", row=3, col=1) fig.update_xaxes(title_text="Epoch", row=3, col=2) fig.update_xaxes(title_text="Epoch", row=3, col=3) fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=1) fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=2) fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=3) fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=1) fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=2) fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=3) fig.write_html(str(output_path), include_plotlyjs="cdn") def _make_line_chart(df: pd.DataFrame, y: str, title: str, color: str, output_path: Path): backend, line_chart, _ = _chart_backend() if backend == "openbb" and line_chart is not None: fig = line_chart( data=df, x="steps", y=y, title=title, xtitle="Training steps", ytitle=y.replace("_", " ").title(), render=False, layout_kwargs={ "template": "plotly_dark", "paper_bgcolor": "#0f172a", "plot_bgcolor": "#0f172a", "font": {"color": "#e2e8f0"}, }, scatter_kwargs={"line": {"color": color, "width": 3}}, ) _save_figure(fig, output_path) return import plotly.express as px fig = px.line(df, x="steps", y=y, markers=True, title=title, template="plotly_dark", color_discrete_sequence=[color]) fig.update_layout(paper_bgcolor="#0f172a", plot_bgcolor="#0f172a", font=dict(color="#e2e8f0")) fig.write_html(str(output_path)) def _make_bar_chart(df: pd.DataFrame, x: str, y: str, title: str, color: str, output_path: Path): backend, _, bar_chart = _chart_backend() if backend == "openbb" and bar_chart is not None: fig = bar_chart( data=df, x=x, y=y, title=title, xtitle=x.replace("_", " ").title(), ytitle=y.replace("_", " ").title(), render=False, colors=[color], layout_kwargs={ "template": "plotly_dark", "paper_bgcolor": "#0f172a", "plot_bgcolor": "#0f172a", "font": {"color": "#e2e8f0"}, }, ) _save_figure(fig, output_path) return import plotly.express as px fig = px.bar(df, x=x, y=y, title=title, template="plotly_dark", color_discrete_sequence=[color]) fig.update_layout(paper_bgcolor="#0f172a", plot_bgcolor="#0f172a", font=dict(color="#e2e8f0")) fig.write_html(str(output_path)) def run_benchmark_suite( step_counts: list[int], batch_size: int = 64, seed: int = 0, output_dir: str = "artifacts/benchmarks", target_accuracy: float = 0.99, ) -> BenchmarkResult: out_dir = Path(output_dir) out_dir.mkdir(parents=True, exist_ok=True) rows: list[dict[str, float]] = [] last_result = None for epoch, steps in enumerate(step_counts, start=1): started = time.perf_counter() result = run_tinygrad_gate_demo(steps=steps, batch_size=batch_size, seed=seed, target_accuracy=target_accuracy) elapsed = time.perf_counter() - started memory_rss_mb = result.telemetry[-1].memory_rss_mb if result.telemetry else 0.0 child_processes = result.telemetry[-1].child_processes if result.telemetry else 0 thread_count = result.telemetry[-1].thread_count if result.telemetry else 0 predictability_score = exp(-result.final_loss) * 100.0 rows.append( { "epoch": int(epoch), "steps": int(result.trained_steps), "wall_time_sec": elapsed, "samples_per_sec": (steps * batch_size) / max(elapsed, 1e-9), "initial_accuracy": result.initial_accuracy, "final_accuracy": result.final_accuracy, "final_loss": result.final_loss, "predictability_score": predictability_score, "memory_rss_mb": memory_rss_mb, "child_processes": float(child_processes), "thread_count": float(thread_count), "reached_target": int(1 if result.reached_target else 0), "trained_steps": int(result.trained_steps), "target_accuracy": result.target_accuracy, } ) last_result = result df = pd.DataFrame(rows).sort_values("steps") csv_path = out_dir / "gate_benchmarks.csv" df.to_csv(csv_path, index=False) if not df.empty and df["final_accuracy"].iloc[-1] < target_accuracy: extended_step = int(max(df["steps"].iloc[-1] * 2, 256)) while df["final_accuracy"].iloc[-1] < target_accuracy and extended_step <= 4096: started = time.perf_counter() result = run_tinygrad_gate_demo(steps=extended_step, batch_size=batch_size, seed=seed, target_accuracy=target_accuracy) elapsed = time.perf_counter() - started memory_rss_mb = result.telemetry[-1].memory_rss_mb if result.telemetry else 0.0 child_processes = result.telemetry[-1].child_processes if result.telemetry else 0 thread_count = result.telemetry[-1].thread_count if result.telemetry else 0 predictability_score = exp(-result.final_loss) * 100.0 df = pd.concat([ df, pd.DataFrame([ { "epoch": int(df["epoch"].iloc[-1] + 1), "steps": int(result.trained_steps), "wall_time_sec": elapsed, "samples_per_sec": (extended_step * batch_size) / max(elapsed, 1e-9), "initial_accuracy": result.initial_accuracy, "final_accuracy": result.final_accuracy, "final_loss": result.final_loss, "predictability_score": predictability_score, "memory_rss_mb": memory_rss_mb, "child_processes": float(child_processes), "thread_count": float(thread_count), "reached_target": int(1 if result.reached_target else 0), "trained_steps": int(result.trained_steps), "target_accuracy": result.target_accuracy, } ]) ], ignore_index=True) extended_step *= 2 df.to_csv(csv_path, index=False) chart_paths: list[str] = [] gate_df = pd.DataFrame( { "channel": [f"c{i}" for i in range(len(last_result.learned_gates))] if last_result is not None else [], "gate_scale": last_result.learned_gates if last_result is not None else [], } ) dashboard_path = out_dir / "benchmark_dashboard.html" _make_dashboard(df, gate_df, dashboard_path) chart_paths.append(str(dashboard_path)) accuracy_chart = out_dir / "accuracy_curve.html" _make_line_chart(df, "final_accuracy", "Gate Controller Accuracy vs Training Steps", "#22c55e", accuracy_chart) chart_paths.append(str(accuracy_chart)) loss_chart = out_dir / "loss_curve.html" _make_line_chart(df, "final_loss", "Gate Controller Loss vs Training Steps", "#f97316", loss_chart) chart_paths.append(str(loss_chart)) throughput_chart = out_dir / "throughput_curve.html" _make_line_chart(df, "samples_per_sec", "Gate Controller Throughput vs Training Steps", "#38bdf8", throughput_chart) chart_paths.append(str(throughput_chart)) if last_result is not None: gate_sample_df = pd.DataFrame( { "channel": [f"c{i}" for i in range(len(last_result.learned_gate_sample))], "gate_scale": last_result.learned_gate_sample, } ) gate_chart = out_dir / "learned_gates.html" _make_bar_chart(gate_sample_df, "channel", "gate_scale", "Learned Gate Scales", "#a855f7", gate_chart) chart_paths.append(str(gate_chart)) return BenchmarkResult(csv_path=str(csv_path), chart_paths=chart_paths)