File size: 1,253 Bytes
8097081
 
 
1435892
8097081
 
 
1435892
 
8097081
 
 
1435892
8097081
 
 
1435892
8097081
 
 
1435892
8097081
 
 
 
 
 
 
 
 
 
 
 
1435892
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# src/pytorch_debug_env/graders.py
from __future__ import annotations

from .reward import clamp_score, final_diagnosis_score


def grade_easy(action: dict, gt: dict) -> float:
    """Easy grader: strict match on the core diagnosis fields."""
    return clamp_score(final_diagnosis_score(action, gt))


def grade_medium(action: dict, gt: dict) -> float:
    """Medium grader: add small credit for related-file hypotheses."""
    score = final_diagnosis_score(action, gt)
    if action.get("affected_file") in gt.get("related_files", []):
        score = min(1.0, score + 0.05)
    return round(clamp_score(score), 4)


def grade_hard(action: dict, gt: dict) -> float:
    """Hard grader: allow category credit, penalize red herrings."""
    score = final_diagnosis_score(action, gt)

    # partial credit if model gets the right category on subtle bugs
    if score < 0.2 and action.get("bug_type"):
        if gt.get("category"):
            from .bug_library import BUG_CATEGORIES
            if BUG_CATEGORIES.get(action["bug_type"]) == gt["category"]:
                score = max(score, 0.18)

    if action.get("affected_file") == gt.get("red_herring_file"):
        score = max(0.0, score - 0.1)

    return round(clamp_score(min(score, 1.0)), 4)