Spaces:

Aswini-Kumar
/

data-centric-env

Sleeping

File size: 10,735 Bytes

"""
Plot Rewards — Data-Centric AI RL Environment
=============================================
Reads JSONL training logs and produces judge-ready plots with labeled axes.

Log format (one JSON object per line in logs/training.jsonl):
  {
    "ts": 1714000000.0,       # Unix timestamp
    "episode": 42,            # Episode number
    "task": "task_1_easy",    # Task name
    "level": 1,               # Curriculum level (0=tutorial ... 3=hard)
    "reward": 0.34,           # Episode reward
    "accuracy_gain": 0.08,    # Accuracy delta vs baseline
    "steps_used": 18,         # Steps consumed
    "success": true           # Reached target accuracy?
  }

Output (saved to plots/):
  reward_curve.png     — Episode reward with rolling mean
  success_rate.png     — Success rate per curriculum level
  accuracy_gain.png    — Accuracy gain distribution
  curriculum.png       — Curriculum level over episodes

Usage:
  python plot_rewards.py                          # default log path
  python plot_rewards.py --log logs/training.jsonl --out plots/
"""

import argparse
import json
import sys
from pathlib import Path

import matplotlib
matplotlib.use("Agg")  # non-interactive backend — safe for headless/Colab
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd

# ── Style ────────────────────────────────────────────────────────────────────

LEVEL_COLORS = {0: "#4C72B0", 1: "#DD8452", 2: "#55A868", 3: "#C44E52"}
LEVEL_NAMES  = {0: "tutorial", 1: "easy", 2: "medium", 3: "hard"}
FIGSIZE = (10, 4)
DPI = 150

plt.rcParams.update({
    "font.size": 11,
    "axes.titlesize": 13,
    "axes.titleweight": "bold",
    "axes.labelsize": 11,
    "grid.alpha": 0.3,
})


# ── Load log ─────────────────────────────────────────────────────────────────

def load_log(log_path: str) -> pd.DataFrame:
    """Load JSONL training log. Returns empty DataFrame if file not found."""
    path = Path(log_path)
    if not path.exists():
        print(f"[plot_rewards] Log not found: {log_path}")
        return pd.DataFrame()

    records = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                try:
                    records.append(json.loads(line))
                except json.JSONDecodeError:
                    pass

    if not records:
        print(f"[plot_rewards] Log is empty: {log_path}")
        return pd.DataFrame()

    df = pd.DataFrame(records)
    # Normalise column names — handle both old and new log formats
    col_map = {
        "mean_total_reward": "reward",
        "mean_env_reward": "accuracy_gain",
        "stage": "task",
    }
    df.rename(columns=col_map, inplace=True)
    if "episode" not in df.columns:
        df["episode"] = range(len(df))
    if "level" not in df.columns:
        df["level"] = 0
    if "success" not in df.columns:
        df["success"] = df.get("accuracy_gain", 0) > 0.05
    if "accuracy_gain" not in df.columns:
        df["accuracy_gain"] = 0.0
    if "reward" not in df.columns:
        df["reward"] = 0.0

    df.sort_values("episode", inplace=True)
    df.reset_index(drop=True, inplace=True)
    n = len(df)
    print(f"[plot_rewards] Loaded {n} episodes from {log_path}")
    return df


def _adaptive_window(df: pd.DataFrame, requested: int) -> int:
    """Use min(requested, len/3) so plots are never flat lines with few data points."""
    return max(3, min(requested, len(df) // 3))


# ── Plots ─────────────────────────────────────────────────────────────────────

def plot_reward_curve(df: pd.DataFrame, out_dir: Path, window: int = 20):
    """Plot 1: Episode reward over training with rolling mean."""
    fig, ax = plt.subplots(figsize=FIGSIZE)

    ax.plot(df["episode"], df["reward"], alpha=0.25, color="steelblue",
            linewidth=0.8, label="Raw reward")

    if len(df) >= window:
        smooth = df["reward"].rolling(window, min_periods=1).mean()
        ax.plot(df["episode"], smooth, color="steelblue", linewidth=2.2,
                label=f"Rolling mean (window={window})")

    # Mark curriculum level transitions
    level_changes = df[df["level"].diff() != 0]
    for _, row in level_changes.iterrows():
        if row["level"] > 0:
            ax.axvline(row["episode"], color=LEVEL_COLORS.get(int(row["level"]), "gray"),
                       linewidth=1.0, linestyle="--", alpha=0.7)
            ax.text(row["episode"] + 0.5, ax.get_ylim()[1] * 0.95,
                    LEVEL_NAMES.get(int(row["level"]), ""), fontsize=8,
                    color=LEVEL_COLORS.get(int(row["level"]), "gray"), rotation=90, va="top")

    ax.set_xlabel("Episode")
    ax.set_ylabel("Episode reward")
    ax.set_title("Training Reward over Episodes")
    ax.legend(loc="lower right")
    ax.grid(True)
    fig.tight_layout()

    out_path = out_dir / "reward_curve.png"
    fig.savefig(out_path, dpi=DPI)
    plt.close(fig)
    print(f"[plot_rewards] Saved: {out_path}")


def plot_success_rate(df: pd.DataFrame, out_dir: Path, window: int = 20):
    """Plot 2: Success rate per curriculum level."""
    fig, ax = plt.subplots(figsize=FIGSIZE)

    levels = sorted(df["level"].unique())
    for level in levels:
        subset = df[df["level"] == level].copy()
        subset = subset.sort_values("episode").reset_index(drop=True)
        rate = subset["success"].rolling(window, min_periods=1).mean()
        color = LEVEL_COLORS.get(int(level), "gray")
        label = f"Level {int(level)}: {LEVEL_NAMES.get(int(level), 'unknown')}"
        ax.plot(subset["episode"], rate, color=color, linewidth=2, label=label)

    ax.axhline(0.60, color="red", linewidth=1.0, linestyle="--", alpha=0.6,
               label="Advancement threshold (60%)")
    ax.set_xlabel("Episode")
    ax.set_ylabel(f"Success rate (rolling mean, window={window})")
    ax.set_title("Success Rate per Curriculum Level")
    ax.set_ylim(0, 1.05)
    ax.legend(loc="lower right")
    ax.grid(True)
    fig.tight_layout()

    out_path = out_dir / "success_rate.png"
    fig.savefig(out_path, dpi=DPI)
    plt.close(fig)
    print(f"[plot_rewards] Saved: {out_path}")


def plot_accuracy_gain(df: pd.DataFrame, out_dir: Path, window: int = 20):
    """Plot 3: Accuracy gain over training."""
    fig, ax = plt.subplots(figsize=FIGSIZE)

    ax.plot(df["episode"], df["accuracy_gain"], alpha=0.25, color="green",
            linewidth=0.8, label="Raw accuracy gain")

    if len(df) >= window:
        smooth = df["accuracy_gain"].rolling(window, min_periods=1).mean()
        ax.plot(df["episode"], smooth, color="green", linewidth=2.2,
                label=f"Rolling mean (window={window})")

    ax.axhline(0, color="black", linewidth=0.8, linestyle="-", alpha=0.4)
    ax.set_xlabel("Episode")
    ax.set_ylabel("Accuracy gain vs baseline")
    ax.set_title("Accuracy Gain per Episode")
    ax.legend(loc="lower right")
    ax.grid(True)
    fig.tight_layout()

    out_path = out_dir / "accuracy_gain.png"
    fig.savefig(out_path, dpi=DPI)
    plt.close(fig)
    print(f"[plot_rewards] Saved: {out_path}")


def plot_curriculum(df: pd.DataFrame, out_dir: Path):
    """Plot 4: Curriculum level progression over time."""
    fig, ax = plt.subplots(figsize=FIGSIZE)

    colors = [LEVEL_COLORS.get(int(l), "gray") for l in df["level"]]
    ax.scatter(df["episode"], df["level"], c=colors, s=4, alpha=0.5, zorder=2)

    # Smooth line
    ax.plot(df["episode"], df["level"].rolling(10, min_periods=1).mean(),
            color="black", linewidth=1.5, alpha=0.6, label="Rolling mean level")

    ax.set_xlabel("Episode")
    ax.set_ylabel("Curriculum level")
    ax.set_title("Curriculum Progression")
    ax.set_yticks([0, 1, 2, 3])
    ax.set_yticklabels(["0: tutorial", "1: easy", "2: medium", "3: hard"])
    ax.grid(True, axis="x")

    patches = [mpatches.Patch(color=c, label=f"{l}: {LEVEL_NAMES[l]}")
               for l, c in LEVEL_COLORS.items()]
    ax.legend(handles=patches, loc="lower right", fontsize=9)
    fig.tight_layout()

    out_path = out_dir / "curriculum.png"
    fig.savefig(out_path, dpi=DPI)
    plt.close(fig)
    print(f"[plot_rewards] Saved: {out_path}")


# ── Entry point ───────────────────────────────────────────────────────────────

def plot_all(log_path: str = "logs/training.jsonl", out_dir: str = "plots/",
             window: int = 20):
    df = load_log(log_path)
    if df.empty:
        print("[plot_rewards] No data to plot.")
        return

    out = Path(out_dir)
    out.mkdir(parents=True, exist_ok=True)

    # Adaptive window — avoid flat lines with small datasets
    w = _adaptive_window(df, window)

    plot_reward_curve(df, out, w)
    plot_success_rate(df, out, w)
    plot_accuracy_gain(df, out, w)
    plot_curriculum(df, out)

    # ── Print summary stats ───────────────────────────────────────────────────
    n = len(df)
    avg_r = df["reward"].mean()
    max_r = df["reward"].max()
    min_r = df["reward"].min()
    succ  = df["success"].mean()
    max_lvl = int(df["level"].max())
    lvl_names = {0: "tutorial", 1: "easy", 2: "medium", 3: "hard"}

    print(f"\n{'='*50}")
    print(f"  TRAINING SUMMARY ({n} episodes)")
    print(f"{'='*50}")
    print(f"  Avg reward   : {avg_r:+.3f}")
    print(f"  Min / Max    : {min_r:+.3f} / {max_r:+.3f}")
    print(f"  Success rate : {succ:.1%}")
    print(f"  Max level    : {max_lvl} ({lvl_names.get(max_lvl, '?')})")
    print(f"  Window used  : {w} episodes")
    print(f"{'='*50}")
    print(f"\n  Plots saved to: {out}/")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Plot training reward curves")
    parser.add_argument("--log", default="logs/training.jsonl",
                        help="Path to JSONL training log")
    parser.add_argument("--out", default="plots/",
                        help="Output directory for plots")
    parser.add_argument("--window", type=int, default=20,
                        help="Rolling mean window size")
    args = parser.parse_args()
    plot_all(args.log, args.out, args.window)