from __future__ import annotations import argparse import json from pathlib import Path import sys def main() -> None: ap = argparse.ArgumentParser(description="Run a tiny baseline and save a reward-curve PNG.") ap.add_argument("--episodes", type=int, default=200) ap.add_argument("--out-dir", type=Path, default=Path("plots")) args = ap.parse_args() # Allow running from a fresh clone without `pip install -e .`. repo_root = Path(__file__).resolve().parent.parent sys.path.insert(0, str(repo_root)) # Local, in-process baseline (no server needed). from commitguard_env.environment import CommitGuardEnvironment from commitguard_env.models import CommitGuardAction data_path = repo_root / "data" / "devign_filtered.jsonl" env = CommitGuardEnvironment(data_path=data_path) rewards: list[float] = [] for _ in range(args.episodes): _ = env.reset() # Naive always-vulnerable verdict baseline (intentionally dumb). action = CommitGuardAction( action_type="verdict", is_vulnerable=True, vuln_type="CWE-89", exploit_sketch="sql select where concat injection", ) _obs, reward, _done = env.step(action) rewards.append(float(reward)) args.out_dir.mkdir(parents=True, exist_ok=True) (args.out_dir / "baseline_rewards.json").write_text(json.dumps(rewards), encoding="utf-8") import matplotlib.pyplot as plt plt.figure(figsize=(8, 4)) plt.plot(rewards, linewidth=1) plt.title("CommitGuard baseline reward curve (naive always-vulnerable)") plt.xlabel("Episode") plt.ylabel("Reward") plt.tight_layout() plt.savefig(args.out_dir / "baseline_reward_curve.png", dpi=180) if __name__ == "__main__": main()