File size: 15,264 Bytes

3f2dde4

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
import time
from math import exp

import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from .hardware import collect_hardware_specs, hardware_table_rows
from .smoke import run_tinygrad_gate_demo


@dataclass(slots=True)
class BenchmarkResult:
    csv_path: str
    chart_paths: list[str]


def _chart_backend():
    try:
        from openbb_charting.charts.generic_charts import bar_chart, line_chart  # type: ignore[import-not-found]

        return "openbb", line_chart, bar_chart
    except Exception:
        return "plotly", None, None


def _save_figure(fig, path: Path) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    if hasattr(fig, "show"):
        try:
            fig = fig.show(external=True)
        except TypeError:
            pass
    if hasattr(fig, "write_html"):
        fig.write_html(str(path))
        return
    raise RuntimeError("chart object does not support HTML export")


def _make_dashboard(df: pd.DataFrame, gate_df: pd.DataFrame, output_path: Path) -> None:
    hardware_specs = collect_hardware_specs()
    hardware_rows = hardware_table_rows(hardware_specs)
    hardware_table = pd.DataFrame(hardware_rows)

    fig = make_subplots(
        rows=5,
        cols=3,
        specs=[
            [{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}],
            [{"type": "xy"}, {"type": "xy"}, {"type": "xy", "secondary_y": True}],
            [{"type": "xy"}, {"type": "xy"}, {"type": "xy"}],
            [{"type": "xy"}, {"type": "heatmap"}, {"type": "xy"}],
            [{"type": "table", "colspan": 3}, None, None],
        ],
        row_heights=[0.12, 0.22, 0.20, 0.22, 0.24],
        subplot_titles=(
            "Final Accuracy",
            "Predictability",
            "Peak Memory",
            "Accuracy vs Epoch",
            "Loss vs Epoch",
            "Memory and Processes",
            "Training Time",
            "Training Steps",
            "Throughput",
            "Learned Gates",
            "Metric Correlation",
            "Accuracy vs Loss",
            "Hardware Specs",
        ),
        vertical_spacing=0.08,
        horizontal_spacing=0.06,
    )

    latest = df.iloc[-1]
    indicators = [
        (latest["final_accuracy"], ".1%", "#22c55e", "Accuracy"),
        (latest["predictability_score"], ".2f", "#38bdf8", "Predictability"),
        (latest["memory_rss_mb"], ".1f", "#f97316", "RSS MB"),
    ]
    initial_indicator_values = [
        float(df.iloc[0]["final_accuracy"]),
        float(df.iloc[0]["predictability_score"]),
        float(df.iloc[0]["memory_rss_mb"]),
    ]
    for idx, (value, fmt, color, title) in enumerate(indicators, start=1):
        fig.add_trace(
            go.Indicator(
                mode="number+delta",
                value=float(value),
                number={"valueformat": fmt, "font": {"size": 24, "color": color}},
                title={"text": title, "font": {"size": 14, "color": "#e2e8f0"}},
                delta={"reference": initial_indicator_values[idx - 1], "relative": False},
            ),
            row=1,
            col=idx,
        )

    fig.add_trace(go.Scatter(x=df["epoch"], y=df["final_accuracy"], mode="lines+markers", line=dict(color="#22c55e", width=3), name="Accuracy", showlegend=False), row=2, col=1)
    fig.add_trace(go.Scatter(x=df["epoch"], y=df["predictability_score"], mode="lines+markers", line=dict(color="#38bdf8", width=3), name="Predictability", showlegend=False), row=2, col=1)
    fig.add_trace(go.Scatter(x=df["epoch"], y=df["final_loss"], mode="lines+markers", line=dict(color="#f97316", width=3), name="Loss", showlegend=False), row=2, col=2)
    fig.add_trace(go.Scatter(x=df["epoch"], y=df["wall_time_sec"], mode="lines+markers", line=dict(color="#a855f7", width=3), name="Wall Time (s)", showlegend=False), row=2, col=3, secondary_y=False)
    fig.add_trace(go.Bar(x=df["epoch"], y=df["memory_rss_mb"], marker_color="#f97316", name="Memory MB", showlegend=False), row=2, col=3, secondary_y=True)

    fig.add_trace(go.Scatter(x=df["epoch"], y=df["wall_time_sec"], mode="lines+markers", line=dict(color="#a855f7", width=3), name="Wall Time (s)", showlegend=False), row=3, col=1)
    fig.add_trace(go.Scatter(x=df["epoch"], y=df["steps"], mode="lines+markers", line=dict(color="#14b8a6", width=3), name="Training Steps", showlegend=False), row=3, col=2)
    fig.add_trace(go.Bar(x=df["epoch"], y=df["samples_per_sec"], marker_color="#38bdf8", name="Samples/sec", showlegend=False), row=3, col=3)

    fig.add_trace(go.Bar(x=gate_df["channel"], y=gate_df["gate_scale"], marker_color="#a855f7", name="Gate Scale", showlegend=False), row=4, col=1)

    corr_df = df[["final_accuracy", "predictability_score", "final_loss", "wall_time_sec", "memory_rss_mb", "samples_per_sec"]].corr()
    fig.add_trace(go.Heatmap(z=corr_df.values, x=corr_df.columns, y=corr_df.index, colorscale="RdBu", zmid=0, showscale=False), row=4, col=2)

    fig.add_trace(go.Scatter(x=df["final_loss"], y=df["final_accuracy"], mode="markers+text", text=df["epoch"].astype(str), textposition="top center", marker=dict(size=14, color=df["memory_rss_mb"], colorscale="Viridis", showscale=True), name="Accuracy/Loss", showlegend=False), row=4, col=3)

    fig.add_trace(
        go.Table(
            header=dict(
                values=["<b>Metric</b>", "<b>Value</b>"],
                fill_color="#0f172a",
                font=dict(color="#e2e8f0", size=14),
                align="left",
                height=28,
            ),
            cells=dict(
                values=[hardware_table["Metric"], hardware_table["Value"]],
                fill_color="#111827",
                font=dict(color="#e2e8f0", size=12),
                align="left",
                height=24,
            ),
        ),
        row=5,
        col=1,
    )

    fig.update_layout(
        template="plotly_dark",
        height=1950,
        width=2000,
        title_text="OpenPeer NTK Trainer Benchmark Dashboard",
        paper_bgcolor="#0f172a",
        plot_bgcolor="#0f172a",
        font=dict(color="#e2e8f0", size=12),
        showlegend=False,
        margin=dict(l=30, r=30, t=90, b=30),
        title_x=0.02,
    )
    fig.update_annotations(font=dict(size=13, color="#e2e8f0"), yshift=10)
    fig.update_yaxes(title_text="Seconds", row=2, col=3, secondary_y=False)
    fig.update_yaxes(title_text="Memory MB", row=2, col=3, secondary_y=True)
    fig.update_xaxes(title_text="Epoch", row=2, col=1)
    fig.update_xaxes(title_text="Epoch", row=2, col=2)
    fig.update_xaxes(title_text="Epoch", row=2, col=3)
    fig.update_xaxes(title_text="Epoch", row=3, col=1)
    fig.update_xaxes(title_text="Epoch", row=3, col=2)
    fig.update_xaxes(title_text="Epoch", row=3, col=3)
    fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=1)
    fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=2)
    fig.update_xaxes(tickmode="linear", dtick=1, row=2, col=3)
    fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=1)
    fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=2)
    fig.update_xaxes(tickmode="linear", dtick=1, row=3, col=3)
    fig.write_html(str(output_path), include_plotlyjs="cdn")


def _make_line_chart(df: pd.DataFrame, y: str, title: str, color: str, output_path: Path):
    backend, line_chart, _ = _chart_backend()
    if backend == "openbb" and line_chart is not None:
        fig = line_chart(
            data=df,
            x="steps",
            y=y,
            title=title,
            xtitle="Training steps",
            ytitle=y.replace("_", " ").title(),
            render=False,
            layout_kwargs={
                "template": "plotly_dark",
                "paper_bgcolor": "#0f172a",
                "plot_bgcolor": "#0f172a",
                "font": {"color": "#e2e8f0"},
            },
            scatter_kwargs={"line": {"color": color, "width": 3}},
        )
        _save_figure(fig, output_path)
        return

    import plotly.express as px

    fig = px.line(df, x="steps", y=y, markers=True, title=title, template="plotly_dark", color_discrete_sequence=[color])
    fig.update_layout(paper_bgcolor="#0f172a", plot_bgcolor="#0f172a", font=dict(color="#e2e8f0"))
    fig.write_html(str(output_path))


def _make_bar_chart(df: pd.DataFrame, x: str, y: str, title: str, color: str, output_path: Path):
    backend, _, bar_chart = _chart_backend()
    if backend == "openbb" and bar_chart is not None:
        fig = bar_chart(
            data=df,
            x=x,
            y=y,
            title=title,
            xtitle=x.replace("_", " ").title(),
            ytitle=y.replace("_", " ").title(),
            render=False,
            colors=[color],
            layout_kwargs={
                "template": "plotly_dark",
                "paper_bgcolor": "#0f172a",
                "plot_bgcolor": "#0f172a",
                "font": {"color": "#e2e8f0"},
            },
        )
        _save_figure(fig, output_path)
        return

    import plotly.express as px

    fig = px.bar(df, x=x, y=y, title=title, template="plotly_dark", color_discrete_sequence=[color])
    fig.update_layout(paper_bgcolor="#0f172a", plot_bgcolor="#0f172a", font=dict(color="#e2e8f0"))
    fig.write_html(str(output_path))


def run_benchmark_suite(

    step_counts: list[int],

    batch_size: int = 64,

    seed: int = 0,

    output_dir: str = "artifacts/benchmarks",

    target_accuracy: float = 0.99,

) -> BenchmarkResult:
    out_dir = Path(output_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    rows: list[dict[str, float]] = []
    last_result = None

    for epoch, steps in enumerate(step_counts, start=1):
        started = time.perf_counter()
        result = run_tinygrad_gate_demo(steps=steps, batch_size=batch_size, seed=seed, target_accuracy=target_accuracy)
        elapsed = time.perf_counter() - started
        memory_rss_mb = result.telemetry[-1].memory_rss_mb if result.telemetry else 0.0
        child_processes = result.telemetry[-1].child_processes if result.telemetry else 0
        thread_count = result.telemetry[-1].thread_count if result.telemetry else 0
        predictability_score = exp(-result.final_loss) * 100.0
        rows.append(
            {
                "epoch": int(epoch),
                "steps": int(result.trained_steps),
                "wall_time_sec": elapsed,
                "samples_per_sec": (steps * batch_size) / max(elapsed, 1e-9),
                "initial_accuracy": result.initial_accuracy,
                "final_accuracy": result.final_accuracy,
                "final_loss": result.final_loss,
                "predictability_score": predictability_score,
                "memory_rss_mb": memory_rss_mb,
                "child_processes": float(child_processes),
                "thread_count": float(thread_count),
                "reached_target": int(1 if result.reached_target else 0),
                "trained_steps": int(result.trained_steps),
                "target_accuracy": result.target_accuracy,
            }
        )
        last_result = result

    df = pd.DataFrame(rows).sort_values("steps")
    csv_path = out_dir / "gate_benchmarks.csv"
    df.to_csv(csv_path, index=False)

    if not df.empty and df["final_accuracy"].iloc[-1] < target_accuracy:
        extended_step = int(max(df["steps"].iloc[-1] * 2, 256))
        while df["final_accuracy"].iloc[-1] < target_accuracy and extended_step <= 4096:
            started = time.perf_counter()
            result = run_tinygrad_gate_demo(steps=extended_step, batch_size=batch_size, seed=seed, target_accuracy=target_accuracy)
            elapsed = time.perf_counter() - started
            memory_rss_mb = result.telemetry[-1].memory_rss_mb if result.telemetry else 0.0
            child_processes = result.telemetry[-1].child_processes if result.telemetry else 0
            thread_count = result.telemetry[-1].thread_count if result.telemetry else 0
            predictability_score = exp(-result.final_loss) * 100.0
            df = pd.concat([
                df,
                pd.DataFrame([
                    {
                        "epoch": int(df["epoch"].iloc[-1] + 1),
                        "steps": int(result.trained_steps),
                        "wall_time_sec": elapsed,
                        "samples_per_sec": (extended_step * batch_size) / max(elapsed, 1e-9),
                        "initial_accuracy": result.initial_accuracy,
                        "final_accuracy": result.final_accuracy,
                        "final_loss": result.final_loss,
                        "predictability_score": predictability_score,
                        "memory_rss_mb": memory_rss_mb,
                        "child_processes": float(child_processes),
                        "thread_count": float(thread_count),
                        "reached_target": int(1 if result.reached_target else 0),
                        "trained_steps": int(result.trained_steps),
                        "target_accuracy": result.target_accuracy,
                    }
                ])
            ], ignore_index=True)
            extended_step *= 2
        df.to_csv(csv_path, index=False)

    chart_paths: list[str] = []

    gate_df = pd.DataFrame(
        {
            "channel": [f"c{i}" for i in range(len(last_result.learned_gates))] if last_result is not None else [],
            "gate_scale": last_result.learned_gates if last_result is not None else [],
        }
    )

    dashboard_path = out_dir / "benchmark_dashboard.html"
    _make_dashboard(df, gate_df, dashboard_path)
    chart_paths.append(str(dashboard_path))

    accuracy_chart = out_dir / "accuracy_curve.html"
    _make_line_chart(df, "final_accuracy", "Gate Controller Accuracy vs Training Steps", "#22c55e", accuracy_chart)
    chart_paths.append(str(accuracy_chart))

    loss_chart = out_dir / "loss_curve.html"
    _make_line_chart(df, "final_loss", "Gate Controller Loss vs Training Steps", "#f97316", loss_chart)
    chart_paths.append(str(loss_chart))

    throughput_chart = out_dir / "throughput_curve.html"
    _make_line_chart(df, "samples_per_sec", "Gate Controller Throughput vs Training Steps", "#38bdf8", throughput_chart)
    chart_paths.append(str(throughput_chart))

    if last_result is not None:
        gate_sample_df = pd.DataFrame(
            {
                "channel": [f"c{i}" for i in range(len(last_result.learned_gate_sample))],
                "gate_scale": last_result.learned_gate_sample,
            }
        )
        gate_chart = out_dir / "learned_gates.html"
        _make_bar_chart(gate_sample_df, "channel", "gate_scale", "Learned Gate Scales", "#a855f7", gate_chart)
        chart_paths.append(str(gate_chart))

    return BenchmarkResult(csv_path=str(csv_path), chart_paths=chart_paths)