Spaces:
Runtime error
Runtime error
File size: 3,339 Bytes
0695520 692f802 0695520 692f802 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | """Bug-fix task grader."""
from __future__ import annotations
try:
from ..models import TaskGrade
from ..tasks.catalog import ReviewTask
except ImportError:
from models import TaskGrade
from tasks.catalog import ReviewTask
from .shared import (
base_grade,
compile_code,
component_score,
execute_cases,
quality_metrics,
shaped_score,
similarity_score,
summarize_results,
)
def grade_bug_fix_task(
task: ReviewTask,
code: str,
*,
include_hidden: bool,
timeout_s: float = 2.0,
) -> TaskGrade:
"""Grade a bug-fix task against public or full test suites."""
compiled, compile_error = compile_code(code)
quality = quality_metrics(code, task.function_name)
details = {
"compile_error": compile_error,
"quality_notes": quality["quality_notes"],
"style_score": quality["style_score"],
"visibility": "full" if include_hidden else "public",
}
if not compiled:
progress = 0.02 + 0.12 * similarity_score(code, task.reference_code)
details["test_results"] = []
details["test_summary"] = "Code does not compile."
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.01),
tests_passed=0,
tests_total=len(task.public_cases) + (len(task.hidden_cases) if include_hidden else 0),
quality_score=component_score(0.01),
runtime_score=component_score(0.01),
timed_out=False,
details=details,
)
cases = task.public_cases + (task.hidden_cases if include_hidden else [])
result = execute_cases(code, task.function_name, cases, timeout_s=timeout_s)
if result.get("timed_out"):
details["test_results"] = []
details["test_summary"] = result["error"]
progress = 0.12 + 0.18 * quality["score"]
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=0,
tests_total=len(cases),
quality_score=quality["score"],
runtime_score=component_score(0.01),
timed_out=True,
details=details,
)
if "error" in result:
details["test_results"] = []
details["test_summary"] = result["error"]
progress = 0.1 + 0.2 * quality["score"]
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=0,
tests_total=len(cases),
quality_score=quality["score"],
runtime_score=component_score(0.01),
timed_out=False,
details=details,
)
data = result["data"]
pass_rate = data["passed"] / max(data["total"], 1)
details["test_results"] = data["results"]
details["test_summary"] = summarize_results("Test results", data["results"])
progress = min(1.0, 0.05 + 0.8 * pass_rate + 0.15 * quality["score"])
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=data["passed"],
tests_total=data["total"],
quality_score=quality["score"],
runtime_score=component_score(0.01),
timed_out=False,
details=details,
)
|