File size: 2,390 Bytes
210535c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429a3ac
210535c
 
 
 
 
 
429a3ac
210535c
 
 
 
 
 
429a3ac
210535c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429a3ac
 
210535c
 
 
 
 
 
429a3ac
210535c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
name: sql-query-optimizer
version: "1.0.0"
description: >
  An OpenEnv environment where AI agents learn to review, rewrite, and optimise
  SQL queries for correctness and performance. Covers three real-world failure
  patterns: implicit cross-joins, N+1 subqueries, and multi-dimensional query
  anti-patterns.
author: metaXscaler
tags:
  - openenv
  - sql
  - code-review
  - data-engineering
  - database
tasks:
  - id: 1
    name: fix-broken-join
    difficulty: easy
    grader: deterministic
    description: >
      The agent must replace an implicit cross-join (comma syntax) with an
      explicit INNER JOIN ... ON clause.
  - id: 2
    name: eliminate-n-plus-one
    difficulty: medium
    grader: deterministic
    description: >
      The agent must remove a correlated scalar subquery in the SELECT list
      and replace it with a single LEFT JOIN.
  - id: 3
    name: full-optimization
    difficulty: hard
    grader: deterministic
    description: >
      The agent must fix four independent issues: remove redundant DISTINCT,
      replace SELECT *, eliminate a non-sargable CAST predicate, and add an
      index hint comment.
observation:
  type: object
  fields:
    task_id: integer
    task_name: string
    task_description: string
    query: string
    schema_context: string
    hint: "string | null"
    step_number: integer
    max_steps: integer
    done: boolean
action:
  type: object
  fields:
    rewritten_query: string
    explanation: string
    is_done: boolean
reward:
  type: object
  fields:
    score: "float (0.0, 1.0)"
    grader_score: "float (0.0, 1.0)"
    breakdown:
      correctness: "float [0.0, 1.0]"
      performance: "float [0.0, 1.0]"
      style: "float [0.0, 1.0]"
      step_penalty: "float ≤ 0.0"
    feedback: string
    cumulative_score: "float (0.0, 1.0)"
endpoints:
  - path: /reset
    method: POST
    description: Start a fresh episode for a given task_id
  - path: /step
    method: POST
    description: Submit an Action and advance the episode
  - path: /state
    method: GET
    description: Return the current internal state snapshot
  - path: /tasks
    method: GET
    description: List all tasks and action schema
  - path: /grader
    method: GET
    description: Return grader score for the last completed episode
  - path: /baseline
    method: POST
    description: Trigger baseline inference on all 3 tasks