Spaces:
Sleeping
Sleeping
| """Submission task grader for binary_search_off_by_one.""" | |
| from __future__ import annotations | |
| from collections.abc import Mapping, Sequence | |
| from typing import Any, Optional | |
| from core.sandbox import run_code_with_tests | |
| from tasks.tasks import TASK_BS_OFF_BY_ONE | |
| MIN_SCORE = 0.01 | |
| MAX_SCORE = 0.98 | |
| def _clamp(score: float) -> float: | |
| return round(min(max(score, MIN_SCORE), MAX_SCORE), 4) | |
| def _as_mapping(value: Any) -> Optional[Mapping[str, Any]]: | |
| if isinstance(value, Mapping): | |
| return value | |
| if hasattr(value, "model_dump"): | |
| try: | |
| dumped = value.model_dump() | |
| except Exception: | |
| return None | |
| if isinstance(dumped, Mapping): | |
| return dumped | |
| if hasattr(value, "dict"): | |
| try: | |
| dumped = value.dict() | |
| except Exception: | |
| return None | |
| if isinstance(dumped, Mapping): | |
| return dumped | |
| return None | |
| def _extract_final_observation(payload: Any) -> Any: | |
| if payload is None: | |
| return None | |
| mapping = _as_mapping(payload) | |
| if mapping is not None: | |
| for key in ("final_observation", "observation", "state", "last_observation"): | |
| if key in mapping: | |
| candidate = mapping.get(key) | |
| if candidate is not None: | |
| nested = _extract_final_observation(candidate) | |
| if nested is not None: | |
| return nested | |
| if "trajectory" in mapping: | |
| return _extract_final_observation(mapping.get("trajectory")) | |
| return payload | |
| if isinstance(payload, Sequence) and not isinstance(payload, (str, bytes, bytearray)): | |
| if not payload: | |
| return None | |
| last_item = payload[-1] | |
| if isinstance(last_item, Sequence) and not isinstance(last_item, (str, bytes, bytearray)) and len(last_item) >= 2: | |
| return _extract_final_observation(last_item[1]) | |
| if isinstance(last_item, Mapping) or hasattr(last_item, "model_dump") or hasattr(last_item, "dict"): | |
| return _extract_final_observation(last_item) | |
| return last_item | |
| return payload | |
| def _observation_to_source(observation: Any) -> Optional[str]: | |
| if observation is None: | |
| return None | |
| mapping = _as_mapping(observation) | |
| if mapping is not None: | |
| source = mapping.get("source") | |
| if isinstance(source, str) and source.strip(): | |
| return source | |
| code_lines = mapping.get("code_lines") or mapping.get("code") | |
| if isinstance(code_lines, Sequence) and not isinstance(code_lines, (str, bytes, bytearray)): | |
| return "\n".join(str(line) for line in code_lines) | |
| code_dict = mapping.get("code_dict") | |
| if isinstance(code_dict, Mapping) and code_dict: | |
| ordered_lines: list[tuple[int, str]] = [] | |
| for key, value in code_dict.items(): | |
| try: | |
| line_no = int(key) | |
| except Exception: | |
| continue | |
| ordered_lines.append((line_no, str(value))) | |
| if ordered_lines: | |
| ordered_lines.sort(key=lambda item: item[0]) | |
| return "\n".join(line for _, line in ordered_lines) | |
| for attr in ("source", "code", "code_lines", "code_dict"): | |
| if hasattr(observation, attr): | |
| value = getattr(observation, attr) | |
| if isinstance(value, str) and value.strip(): | |
| return value | |
| if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)): | |
| return "\n".join(str(line) for line in value) | |
| if isinstance(value, Mapping) and value: | |
| ordered_lines = [] | |
| for key, line in value.items(): | |
| try: | |
| ordered_lines.append((int(key), str(line))) | |
| except Exception: | |
| continue | |
| if ordered_lines: | |
| ordered_lines.sort(key=lambda item: item[0]) | |
| return "\n".join(line for _, line in ordered_lines) | |
| return None | |
| def grade(payload: Any = None, *args: Any, **kwargs: Any) -> float: | |
| final_payload = payload if payload is not None else (args[0] if args else kwargs) | |
| final_observation = _extract_final_observation(final_payload) | |
| source = _observation_to_source(final_observation) | |
| if not source or not source.strip(): | |
| return MIN_SCORE | |
| try: | |
| _, results, syntax_err = run_code_with_tests( | |
| source=source, | |
| test_callables=TASK_BS_OFF_BY_ONE["tests"], | |
| ) | |
| except Exception: | |
| return MIN_SCORE | |
| if syntax_err: | |
| return MIN_SCORE | |
| if results and all(test_result.passed for test_result in results): | |
| return MAX_SCORE | |
| return MIN_SCORE | |
| __all__ = ["grade"] |