Spaces:

Aswini-Kumar
/

data-centric-env

Sleeping

App Files Files Community

data-centric-env / plot_rewards.py

Aswini-Kumar

Clean final training notebook: no demo, no Drive, all steps including plots + eval + download

f096486 about 1 month ago

raw

history blame contribute delete

10.7 kB

	"""
	Plot Rewards — Data-Centric AI RL Environment
	=============================================
	Reads JSONL training logs and produces judge-ready plots with labeled axes.

	Log format (one JSON object per line in logs/training.jsonl):
	{
	"ts": 1714000000.0, # Unix timestamp
	"episode": 42, # Episode number
	"task": "task_1_easy", # Task name
	"level": 1, # Curriculum level (0=tutorial ... 3=hard)
	"reward": 0.34, # Episode reward
	"accuracy_gain": 0.08, # Accuracy delta vs baseline
	"steps_used": 18, # Steps consumed
	"success": true # Reached target accuracy?
	}

	Output (saved to plots/):
	reward_curve.png — Episode reward with rolling mean
	success_rate.png — Success rate per curriculum level
	accuracy_gain.png — Accuracy gain distribution
	curriculum.png — Curriculum level over episodes

	Usage:
	python plot_rewards.py # default log path
	python plot_rewards.py --log logs/training.jsonl --out plots/
	"""

	import argparse
	import json
	import sys
	from pathlib import Path

	import matplotlib
	matplotlib.use("Agg") # non-interactive backend — safe for headless/Colab
	import matplotlib.pyplot as plt
	import matplotlib.patches as mpatches
	import numpy as np
	import pandas as pd

	# ── Style ────────────────────────────────────────────────────────────────────

	LEVEL_COLORS = {0: "#4C72B0", 1: "#DD8452", 2: "#55A868", 3: "#C44E52"}
	LEVEL_NAMES = {0: "tutorial", 1: "easy", 2: "medium", 3: "hard"}
	FIGSIZE = (10, 4)
	DPI = 150

	plt.rcParams.update({
	"font.size": 11,
	"axes.titlesize": 13,
	"axes.titleweight": "bold",
	"axes.labelsize": 11,
	"grid.alpha": 0.3,
	})


	# ── Load log ─────────────────────────────────────────────────────────────────

	def load_log(log_path: str) -> pd.DataFrame:
	"""Load JSONL training log. Returns empty DataFrame if file not found."""
	path = Path(log_path)
	if not path.exists():
	print(f"[plot_rewards] Log not found: {log_path}")
	return pd.DataFrame()

	records = []
	with open(path, encoding="utf-8") as f:
	for line in f:
	line = line.strip()
	if line:
	try:
	records.append(json.loads(line))
	except json.JSONDecodeError:
	pass

	if not records:
	print(f"[plot_rewards] Log is empty: {log_path}")
	return pd.DataFrame()

	df = pd.DataFrame(records)
	# Normalise column names — handle both old and new log formats
	col_map = {
	"mean_total_reward": "reward",
	"mean_env_reward": "accuracy_gain",
	"stage": "task",
	}
	df.rename(columns=col_map, inplace=True)
	if "episode" not in df.columns:
	df["episode"] = range(len(df))
	if "level" not in df.columns:
	df["level"] = 0
	if "success" not in df.columns:
	df["success"] = df.get("accuracy_gain", 0) > 0.05
	if "accuracy_gain" not in df.columns:
	df["accuracy_gain"] = 0.0
	if "reward" not in df.columns:
	df["reward"] = 0.0

	df.sort_values("episode", inplace=True)
	df.reset_index(drop=True, inplace=True)
	n = len(df)
	print(f"[plot_rewards] Loaded {n} episodes from {log_path}")
	return df


	def _adaptive_window(df: pd.DataFrame, requested: int) -> int:
	"""Use min(requested, len/3) so plots are never flat lines with few data points."""
	return max(3, min(requested, len(df) // 3))


	# ── Plots ─────────────────────────────────────────────────────────────────────

	def plot_reward_curve(df: pd.DataFrame, out_dir: Path, window: int = 20):
	"""Plot 1: Episode reward over training with rolling mean."""
	fig, ax = plt.subplots(figsize=FIGSIZE)

	ax.plot(df["episode"], df["reward"], alpha=0.25, color="steelblue",
	linewidth=0.8, label="Raw reward")

	if len(df) >= window:
	smooth = df["reward"].rolling(window, min_periods=1).mean()
	ax.plot(df["episode"], smooth, color="steelblue", linewidth=2.2,
	label=f"Rolling mean (window={window})")

	# Mark curriculum level transitions
	level_changes = df[df["level"].diff() != 0]
	for _, row in level_changes.iterrows():
	if row["level"] > 0:
	ax.axvline(row["episode"], color=LEVEL_COLORS.get(int(row["level"]), "gray"),
	linewidth=1.0, linestyle="--", alpha=0.7)
	ax.text(row["episode"] + 0.5, ax.get_ylim()[1] * 0.95,
	LEVEL_NAMES.get(int(row["level"]), ""), fontsize=8,
	color=LEVEL_COLORS.get(int(row["level"]), "gray"), rotation=90, va="top")

	ax.set_xlabel("Episode")
	ax.set_ylabel("Episode reward")
	ax.set_title("Training Reward over Episodes")
	ax.legend(loc="lower right")
	ax.grid(True)
	fig.tight_layout()

	out_path = out_dir / "reward_curve.png"
	fig.savefig(out_path, dpi=DPI)
	plt.close(fig)
	print(f"[plot_rewards] Saved: {out_path}")


	def plot_success_rate(df: pd.DataFrame, out_dir: Path, window: int = 20):
	"""Plot 2: Success rate per curriculum level."""
	fig, ax = plt.subplots(figsize=FIGSIZE)

	levels = sorted(df["level"].unique())
	for level in levels:
	subset = df[df["level"] == level].copy()
	subset = subset.sort_values("episode").reset_index(drop=True)
	rate = subset["success"].rolling(window, min_periods=1).mean()
	color = LEVEL_COLORS.get(int(level), "gray")
	label = f"Level {int(level)}: {LEVEL_NAMES.get(int(level), 'unknown')}"
	ax.plot(subset["episode"], rate, color=color, linewidth=2, label=label)

	ax.axhline(0.60, color="red", linewidth=1.0, linestyle="--", alpha=0.6,
	label="Advancement threshold (60%)")
	ax.set_xlabel("Episode")
	ax.set_ylabel(f"Success rate (rolling mean, window={window})")
	ax.set_title("Success Rate per Curriculum Level")
	ax.set_ylim(0, 1.05)
	ax.legend(loc="lower right")
	ax.grid(True)
	fig.tight_layout()

	out_path = out_dir / "success_rate.png"
	fig.savefig(out_path, dpi=DPI)
	plt.close(fig)
	print(f"[plot_rewards] Saved: {out_path}")


	def plot_accuracy_gain(df: pd.DataFrame, out_dir: Path, window: int = 20):
	"""Plot 3: Accuracy gain over training."""
	fig, ax = plt.subplots(figsize=FIGSIZE)

	ax.plot(df["episode"], df["accuracy_gain"], alpha=0.25, color="green",
	linewidth=0.8, label="Raw accuracy gain")

	if len(df) >= window:
	smooth = df["accuracy_gain"].rolling(window, min_periods=1).mean()
	ax.plot(df["episode"], smooth, color="green", linewidth=2.2,
	label=f"Rolling mean (window={window})")

	ax.axhline(0, color="black", linewidth=0.8, linestyle="-", alpha=0.4)
	ax.set_xlabel("Episode")
	ax.set_ylabel("Accuracy gain vs baseline")
	ax.set_title("Accuracy Gain per Episode")
	ax.legend(loc="lower right")
	ax.grid(True)
	fig.tight_layout()

	out_path = out_dir / "accuracy_gain.png"
	fig.savefig(out_path, dpi=DPI)
	plt.close(fig)
	print(f"[plot_rewards] Saved: {out_path}")


	def plot_curriculum(df: pd.DataFrame, out_dir: Path):
	"""Plot 4: Curriculum level progression over time."""
	fig, ax = plt.subplots(figsize=FIGSIZE)

	colors = [LEVEL_COLORS.get(int(l), "gray") for l in df["level"]]
	ax.scatter(df["episode"], df["level"], c=colors, s=4, alpha=0.5, zorder=2)

	# Smooth line
	ax.plot(df["episode"], df["level"].rolling(10, min_periods=1).mean(),
	color="black", linewidth=1.5, alpha=0.6, label="Rolling mean level")

	ax.set_xlabel("Episode")
	ax.set_ylabel("Curriculum level")
	ax.set_title("Curriculum Progression")
	ax.set_yticks([0, 1, 2, 3])
	ax.set_yticklabels(["0: tutorial", "1: easy", "2: medium", "3: hard"])
	ax.grid(True, axis="x")

	patches = [mpatches.Patch(color=c, label=f"{l}: {LEVEL_NAMES[l]}")
	for l, c in LEVEL_COLORS.items()]
	ax.legend(handles=patches, loc="lower right", fontsize=9)
	fig.tight_layout()

	out_path = out_dir / "curriculum.png"
	fig.savefig(out_path, dpi=DPI)
	plt.close(fig)
	print(f"[plot_rewards] Saved: {out_path}")


	# ── Entry point ───────────────────────────────────────────────────────────────

	def plot_all(log_path: str = "logs/training.jsonl", out_dir: str = "plots/",
	window: int = 20):
	df = load_log(log_path)
	if df.empty:
	print("[plot_rewards] No data to plot.")
	return

	out = Path(out_dir)
	out.mkdir(parents=True, exist_ok=True)

	# Adaptive window — avoid flat lines with small datasets
	w = _adaptive_window(df, window)

	plot_reward_curve(df, out, w)
	plot_success_rate(df, out, w)
	plot_accuracy_gain(df, out, w)
	plot_curriculum(df, out)

	# ── Print summary stats ───────────────────────────────────────────────────
	n = len(df)
	avg_r = df["reward"].mean()
	max_r = df["reward"].max()
	min_r = df["reward"].min()
	succ = df["success"].mean()
	max_lvl = int(df["level"].max())
	lvl_names = {0: "tutorial", 1: "easy", 2: "medium", 3: "hard"}

	print(f"\n{'='*50}")
	print(f" TRAINING SUMMARY ({n} episodes)")
	print(f"{'='*50}")
	print(f" Avg reward : {avg_r:+.3f}")
	print(f" Min / Max : {min_r:+.3f} / {max_r:+.3f}")
	print(f" Success rate : {succ:.1%}")
	print(f" Max level : {max_lvl} ({lvl_names.get(max_lvl, '?')})")
	print(f" Window used : {w} episodes")
	print(f"{'='*50}")
	print(f"\n Plots saved to: {out}/")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Plot training reward curves")
	parser.add_argument("--log", default="logs/training.jsonl",
	help="Path to JSONL training log")
	parser.add_argument("--out", default="plots/",
	help="Output directory for plots")
	parser.add_argument("--window", type=int, default=20,
	help="Rolling mean window size")
	args = parser.parse_args()
	plot_all(args.log, args.out, args.window)