Spaces:

Nitishkumar-ai
/

commitguard-env

Runtime error

App Files Files Community

Nitishkumar-ai commited on about 7 hours ago

Commit

33692a0

1 Parent(s): 8c862c5

Add scripts for hero case finding, hero details retrieval, and training log plotting

Browse files

Files changed (3) hide show

scripts/find_hero_case.py +29 -0
scripts/get_hero_details.py +14 -0
scripts/plot_training_logs.py +58 -0

scripts/find_hero_case.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import json
+from pathlib import Path
+def find_hero_case():
+    data_path = Path("data/devign_filtered.jsonl")
+    if not data_path.exists():
+        print("Data not found.")
+        return
+    with open(data_path, "r") as f:
+        samples = [json.loads(line) for line in f]
+    # Filter for interesting CWEs (SQLi, Command Injection, etc.)
+    hero_candidates = [
+        s for s in samples
+        if s["is_vulnerable"] and s["cwe"] in ["CWE-89", "CWE-78", "CWE-22", "CWE-119"]
+    ]
+    print(f"Found {len(hero_candidates)} hero candidates.")
+    for s in hero_candidates[:5]:
+        print("-" * 40)
+        print(f"Sample ID: {s['sample_id']}")
+        print(f"CWE: {s['cwe']}")
+        print(f"Diff Context:\n{s['diff'][:300]}...")
+        print("-" * 40)
+if __name__ == "__main__":
+    find_hero_case()

scripts/get_hero_details.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import json
+from pathlib import Path
+def get_hero_details(sample_id):
+    data_path = Path("data/devign_filtered.jsonl")
+    with open(data_path, "r") as f:
+        for line in f:
+            s = json.loads(line)
+            if s["sample_id"] == sample_id:
+                print(json.dumps(s, indent=2))
+                return
+if __name__ == "__main__":
+    get_hero_details("d9a3b33d2c9f996537b7f1d0246dee2d0120cefb")

scripts/plot_training_logs.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import json
+import argparse
+import matplotlib.pyplot as plt
+import os
+from pathlib import Path
+def plot_training(log_history, output_path):
+    # Extract rewards and steps
+    # GRPOTrainer logs 'reward' in the history
+    steps = []
+    rewards = []
+    for entry in log_history:
+        if "reward" in entry and "step" in entry:
+            steps.append(entry["step"])
+            rewards.append(entry["reward"])
+    if not steps:
+        print("No reward data found in logs.")
+        return
+    plt.figure(figsize=(10, 5))
+    plt.plot(steps, rewards, label='Mean Reward (per step)', color='#2ecc71', alpha=0.4)
+    # Simple moving average for trend
+    if len(rewards) > 5:
+        window = 5
+        sma = [sum(rewards[i:i+window])/window for i in range(len(rewards)-window+1)]
+        plt.plot(steps[window-1:], sma, label=f'{window}-step Moving Avg', color='#e74c3c', linewidth=2)
+    plt.title("CommitGuard — GRPO Training Reward Curve", fontsize=14)
+    plt.xlabel("Training Step", fontsize=12)
+    plt.ylabel("Mean Reward", fontsize=12)
+    plt.legend()
+    plt.grid(True, linestyle='--', alpha=0.6)
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=180)
+    print(f"Training plot saved to {output_path}")
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--log-file", type=str, default="outputs/commitguard-llama-3b-grpo/final/trainer_state.json")
+    parser.add_argument("--output", type=str, default="plots/training_reward_curve.png")
+    args = parser.parse_args()
+    log_path = Path(args.log_file)
+    if not log_path.exists():
+        print(f"Log file {log_path} not found yet. Training might still be in progress.")
+        return
+    with open(log_path, "r") as f:
+        data = json.load(f)
+    plot_training(data.get("log_history", []), args.output)
+if __name__ == "__main__":
+    main()