Spaces:

vedkdev
/

FlakyTestSleuthOpenEnvRL

Sleeping

File size: 2,763 Bytes

761f203

from __future__ import annotations

import csv
import random
from pathlib import Path
from typing import Any


class TaskLoader:
    def __init__(self, csv_path: str):
        path = Path(csv_path)
        if not path.exists():
            raise FileNotFoundError(f"Task CSV not found: {csv_path}")

        self.tasks: list[dict[str, Any]] = []
        with path.open("r", encoding="utf-8", newline="") as handle:
            reader = csv.DictReader(handle)
            for row in reader:
                task_types = str(row.get("task_types", "")).split(";")
                for raw_type in task_types:
                    task_type = raw_type.strip()
                    if not task_type:
                        continue
                    entry = dict(row)
                    entry["task_type"] = task_type
                    self.tasks.append(entry)

        if not self.tasks:
            raise ValueError(f"No tasks loaded from {csv_path}")

        self._forced_type: str | None = None

    def sample(self) -> dict[str, Any]:
        pool = self.tasks
        if self._forced_type:
            pool = [task for task in self.tasks if task["task_type"] == self._forced_type]
        if not pool:
            raise ValueError(f"No tasks available for task type: {self._forced_type}")

        task = random.choice(pool).copy()
        task["task_description"] = self._make_description(task)
        return task

    def force_task_type(self, task_type: str | None) -> None:
        self._forced_type = task_type

    def _make_description(self, task: dict[str, Any]) -> str:
        task_type = task["task_type"]
        if task_type == "classify":
            return (
                "Investigate the given test and determine whether it is FLAKY or STABLE. "
                "Use read_file and search_code to gather evidence. "
                "When confident, call classify_flakiness with argument 'flaky' or 'stable'."
            )
        if task_type == "root_cause":
            return (
                "This test is confirmed flaky. Identify its root cause category. "
                "Valid categories: OD, OD-Brit, OD-Vic, NIO, NOD, TD, TZD, ID, NDOI. "
                "Use read_file and search_code to find evidence. "
                "Call classify_root_cause with the category code when confident."
            )
        if task_type == "fix_proposal":
            return (
                f"This test is confirmed flaky with root cause: {task.get('category', 'unknown')}. "
                "Propose a concrete fix as a unified diff. "
                "Use read_file and search_code to understand the code. "
                "Call propose_fix with a valid unified diff string."
            )
        return "Investigate the flaky test."