File size: 6,454 Bytes
ebf4715
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
from __future__ import annotations

from dataclasses import dataclass
from typing import Any


@dataclass(frozen=True)
class TaskSpec:
    task_id: str
    name: str
    description: str
    difficulty: str
    max_steps: int
    broken: str
    target: dict[str, Any]
    required_paths: dict[str, float]
    logic_checks: list[str]


TASK_REGISTRY: dict[str, TaskSpec] = {
    "easy_docker": TaskSpec(
        task_id="easy_docker",
        name="Docker Compose Repair",
        description=(
            "Fix docker-compose config: invalid port entry, environment format, "
            "image tags, and full DB port mapping"
        ),
        difficulty="easy",
        max_steps=15,
        broken="""version: \"3.8\"

services:

  web:

    image: nginx

    ports:

      - \"80:80\"

      - abcdef

    environment:

      - DEBUG=true

      - API_KEY

  db:

    image: postgres:15

    ports:

      - \"5432\"

volumes:

  db_data:

""",
        target={
            "version": "3.8",
            "services": {
                "web": {
                    "image": "nginx:latest",
                    "ports": ["80:80"],
                    "environment": {
                        "DEBUG": "true",
                        "API_KEY": "placeholder",
                    },
                },
                "db": {
                    "image": "postgres:15",
                    "ports": ["5432:5432"],
                },
            },
            "volumes": {"db_data": None},
        },
        required_paths={
            "services.web.image": 1.0,
            "services.web.ports": 1.3,
            "services.web.environment.DEBUG": 1.0,
            "services.web.environment.API_KEY": 1.0,
            "services.db.ports": 1.1,
            "volumes.db_data": 0.6,
        },
        logic_checks=[
            "web port must be host:container",
            "db port must be full mapping",
            "environment should be key-value map",
        ],
    ),
    "medium_k8s": TaskSpec(
        task_id="medium_k8s",
        name="Kubernetes Deployment Repair",
        description=(
            "Fix deployment manifest types and required fields: replicas type, "
            "namespace, memory units, cpu request format, and containerPort"
        ),
        difficulty="medium",
        max_steps=18,
        broken="""apiVersion: apps/v1

kind: Deployment

metadata:

  name: web-app

spec:

  replicas: \"3\"

  selector:

    matchLabels:

      app: web

  template:

    metadata:

      labels:

        app: web

    spec:

      containers:

      - name: nginx

        image: nginx

        resources:

          limits:

            memory: 512

            cpu: \"1\"

          requests:

            memory: 1Gi

            cpu: 500m

""",
        target={
            "apiVersion": "apps/v1",
            "kind": "Deployment",
            "metadata": {"name": "web-app", "namespace": "default"},
            "spec": {
                "replicas": 3,
                "selector": {"matchLabels": {"app": "web"}},
                "template": {
                    "metadata": {"labels": {"app": "web"}},
                    "spec": {
                        "containers": [
                            {
                                "name": "nginx",
                                "image": "nginx:latest",
                                "resources": {
                                    "limits": {"memory": "512Mi", "cpu": "1"},
                                    "requests": {"memory": "256Mi", "cpu": "500m"},
                                },
                                "ports": [{"containerPort": 80}],
                            }
                        ]
                    },
                },
            },
        },
        required_paths={
            "metadata.namespace": 1.0,
            "spec.replicas": 1.0,
            "spec.template.spec.containers.0.image": 0.8,
            "spec.template.spec.containers.0.resources.limits.memory": 1.1,
            "spec.template.spec.containers.0.resources.requests.memory": 1.1,
            "spec.template.spec.containers.0.resources.requests.cpu": 1.0,
            "spec.template.spec.containers.0.ports.0.containerPort": 1.0,
        },
        logic_checks=[
            "replicas should be integer",
            "memory values should be strings with unit",
            "cpu request should be millicores string",
        ],
    ),
    "hard_ml_config": TaskSpec(
        task_id="hard_ml_config",
        name="ML Training Config Stabilization",
        description=(
            "Fix interdependent training and hardware constraints: warmup < max, "
            "GPU consistency, optimizer choice, and logging frequency"
        ),
        difficulty="hard",
        max_steps=22,
        broken="""training:

  batch_size: 32

  gradient_accumulation_steps: 4

  max_steps: 100

  warmup_steps: 200

  learning_rate: 0.001

  mixed_precision: fp16

  fp16: true

  optimizer:

    type: adam

    weight_decay: 0.01

hardware:

  gpu_count: 0

  use_cuda: true

data:

  train_batch_size: 64

  eval_batch_size: 32

logging:

  log_interval: 1000

""",
        target={
            "training": {
                "batch_size": 16,
                "gradient_accumulation_steps": 2,
                "max_steps": 1000,
                "warmup_steps": 100,
                "learning_rate": 0.001,
                "mixed_precision": "fp16",
                "optimizer": {"type": "adamw", "weight_decay": 0.01},
            },
            "hardware": {"gpu_count": 1, "use_cuda": True},
            "data": {"train_batch_size": 32, "eval_batch_size": 32},
            "logging": {"log_interval": 10},
        },
        required_paths={
            "training.max_steps": 1.1,
            "training.warmup_steps": 1.3,
            "training.optimizer.type": 1.2,
            "hardware.gpu_count": 1.2,
            "hardware.use_cuda": 0.8,
            "data.train_batch_size": 1.1,
            "logging.log_interval": 1.0,
        },
        logic_checks=[
            "warmup_steps must be less than max_steps",
            "if use_cuda is true, gpu_count must be >= 1",
            "train_batch_size should be 2 * batch_size",
            "log_interval should be <= 100",
        ],
    ),
}