Spaces:
Sleeping
Sleeping
File size: 9,120 Bytes
90fc756 b83c8ad 90fc756 b83c8ad 90fc756 b83c8ad 90fc756 b83c8ad 90fc756 b83c8ad 90fc756 b83c8ad 90fc756 b83c8ad 90fc756 b83c8ad 90fc756 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | [
{
"task_id": "medium_001",
"difficulty": "medium",
"query": "SELECT * FROM events ORDER BY created_at DESC;",
"schema": {
"events": {
"id": "BIGINT PRIMARY KEY",
"event_name": "VARCHAR(255)",
"payload": "JSON",
"created_at": "TIMESTAMP INDEX",
"actor_id": "BIGINT",
"metadata": "JSON"
}
},
"context": "Show the most recent events on an admin dashboard.",
"ground_truth_issues": [
{
"id": "medium_001_select_star",
"category": "performance",
"description": "SELECT * pulls a wide payload when the dashboard only needs a few columns.",
"severity": 0.3,
"fix": "SELECT id, event_name, created_at FROM events ORDER BY created_at DESC LIMIT 50;",
"keywords": [
"select *", "wide table", "projection", "performance", "star",
"all columns", "unnecessary columns", "column selection",
"over-fetching", "wildcard"
]
},
{
"id": "medium_001_missing_limit",
"category": "performance",
"description": "The dashboard query is missing a LIMIT and can scan far more rows than necessary.",
"severity": 0.3,
"fix": "SELECT id, event_name, created_at FROM events ORDER BY created_at DESC LIMIT 50;",
"keywords": [
"limit", "unbounded query", "dashboard", "performance", "no limit",
"missing limit", "unlimited rows", "pagination", "all rows",
"full scan", "row count"
]
}
],
"max_steps": 5
},
{
"task_id": "medium_002",
"difficulty": "medium",
"query": "SELECT c.id, c.name, (SELECT COUNT(*) FROM orders o WHERE o.customer_id = c.id) AS order_count FROM customers c;",
"schema": {
"customers": {
"id": "INT PRIMARY KEY",
"name": "VARCHAR(255)"
},
"orders": {
"id": "INT PRIMARY KEY",
"customer_id": "INT INDEX",
"total": "DECIMAL(10,2)"
}
},
"context": "Show each customer with the number of orders they have placed.",
"ground_truth_issues": [
{
"id": "medium_002_correlated_subquery",
"category": "performance",
"description": "The correlated subquery re-counts orders per row and should be rewritten as a join with GROUP BY.",
"severity": 0.6,
"fix": "SELECT c.id, c.name, COUNT(o.id) AS order_count FROM customers c LEFT JOIN orders o ON o.customer_id = c.id GROUP BY c.id, c.name;",
"keywords": [
"correlated subquery", "group by", "join", "count", "performance",
"subquery per row", "n+1", "rewrite", "left join", "aggregate",
"scalar subquery", "dependent subquery"
]
}
],
"max_steps": 6
},
{
"task_id": "medium_003",
"difficulty": "medium",
"query": "SELECT DISTINCT email FROM users WHERE email IS NOT NULL;",
"schema": {
"users": {
"id": "INT PRIMARY KEY",
"email": "VARCHAR(255) UNIQUE",
"last_login_at": "TIMESTAMP NULL"
}
},
"context": "Export non-null user emails for a CRM sync.",
"ground_truth_issues": [
{
"id": "medium_003_redundant_distinct",
"category": "performance",
"description": "DISTINCT is redundant because users.email is already unique.",
"severity": 0.45,
"fix": "SELECT email FROM users WHERE email IS NOT NULL;",
"keywords": [
"distinct", "unique", "redundant", "email", "performance",
"unnecessary distinct", "unique constraint", "already unique",
"duplicate elimination", "deduplication", "wasted sort"
]
}
],
"max_steps": 5
},
{
"task_id": "medium_004",
"difficulty": "medium",
"query": "SELECT o.id, o.total, u.name FROM orders o JOIN users u ON u.id = o.user_id WHERE DATE(o.created_at) = '2026-04-10';",
"schema": {
"orders": {
"id": "INT PRIMARY KEY",
"user_id": "INT INDEX",
"created_at": "TIMESTAMP INDEX",
"total": "DECIMAL(10,2)"
},
"users": {
"id": "INT PRIMARY KEY",
"name": "VARCHAR(255)"
}
},
"context": "List orders placed on a specific date with the user name attached.",
"ground_truth_issues": [
{
"id": "medium_004_function_on_indexed_column",
"category": "performance",
"description": "Wrapping created_at with DATE() prevents efficient use of the created_at index.",
"severity": 0.6,
"fix": "SELECT o.id, o.total, u.name FROM orders o JOIN users u ON u.id = o.user_id WHERE o.created_at >= '2026-04-10' AND o.created_at < '2026-04-11';",
"keywords": [
"date()", "function on column", "index", "range predicate", "performance",
"sargable", "non-sargable", "prevents index", "full scan",
"index usage", "function wrapping"
]
}
],
"max_steps": 6
},
{
"task_id": "medium_005",
"difficulty": "medium",
"query": "SELECT id, name FROM products WHERE LOWER(name) LIKE '%pro%';",
"schema": {
"products": {
"id": "INT PRIMARY KEY",
"name": "VARCHAR(255) INDEX",
"category_id": "INT",
"price": "DECIMAL(10,2)"
}
},
"context": "Search products whose names contain the text pro.",
"ground_truth_issues": [
{
"id": "medium_005_lower_blocks_index",
"category": "performance",
"description": "Applying LOWER(name) on every row prevents the index on name from being used efficiently.",
"severity": 0.35,
"fix": "SELECT id, name FROM products WHERE name ILIKE 'pro%';",
"keywords": [
"lower", "function on column", "index", "performance", "sargable",
"non-sargable", "case insensitive", "full scan", "table scan",
"function wrapping column"
]
},
{
"id": "medium_005_leading_wildcard",
"category": "performance",
"description": "The leading wildcard in LIKE '%pro%' forces a full scan instead of an index-friendly prefix lookup.",
"severity": 0.35,
"fix": "SELECT id, name FROM products WHERE name ILIKE 'pro%';",
"keywords": [
"leading wildcard", "%pro%", "full scan", "prefix lookup", "performance",
"like wildcard", "pattern matching", "index unusable", "table scan",
"wildcard prefix"
]
}
],
"max_steps": 6
},
{
"task_id": "medium_006",
"difficulty": "medium",
"query": "SELECT * FROM events WHERE DATE(created_at) = '2024-01-15';",
"schema": {
"events": {
"id": "INT PRIMARY KEY",
"name": "VARCHAR(255)",
"created_at": "TIMESTAMP",
"INDEX": "idx_created_at ON events(created_at)"
}
},
"context": "Find all events that happened on a specific date.",
"ground_truth_issues": [
{
"id": "medium_006_function_on_index",
"category": "performance",
"description": "Using DATE() function on an indexed column prevents index usage. Use a range comparison instead.",
"severity": 0.7,
"fix": "SELECT * FROM events WHERE created_at >= '2024-01-15 00:00:00' AND created_at < '2024-01-16 00:00:00';",
"keywords": [
"function on column", "date function", "index", "sargable",
"non-sargable", "prevents index", "range comparison", "full scan",
"table scan", "index usage", "function wrapping column"
]
},
{
"id": "medium_006_star",
"category": "performance",
"description": "SELECT * returns all columns when only specific fields may be needed.",
"severity": 0.2,
"fix": "SELECT id, name, created_at FROM events WHERE created_at >= '2024-01-15' AND created_at < '2024-01-16';",
"keywords": [
"select *", "star", "all columns", "projection", "unnecessary columns",
"wildcard", "over-fetching", "column selection"
]
}
],
"max_steps": 6
},
{
"task_id": "medium_007",
"difficulty": "medium",
"query": "SELECT * FROM products ORDER BY RAND() LIMIT 10;",
"schema": {
"products": {
"id": "INT PRIMARY KEY",
"name": "VARCHAR(255)",
"price": "DECIMAL(10,2)",
"category": "VARCHAR(64)"
}
},
"context": "Show 10 random products on the homepage.",
"ground_truth_issues": [
{
"id": "medium_007_order_rand",
"category": "performance",
"description": "ORDER BY RAND() generates a random value for every row in the table, causing a full table scan and sort. Extremely slow on large tables.",
"severity": 0.8,
"fix": "SELECT * FROM products WHERE id >= (SELECT FLOOR(RAND() * (SELECT MAX(id) FROM products))) LIMIT 10;",
"keywords": [
"order by rand", "random", "full table scan", "sort", "performance",
"slow", "every row", "random ordering", "rand function",
"expensive sort", "large table"
]
}
],
"max_steps": 5
}
]
|