File size: 1,609 Bytes
942050b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""Evaluation harness: ablation matrix, metrics, HTML report.

Stage 6 of the v2 pipeline (per docs/03_eval_methodology.md).

Public surface:
- `BirdExample`, `load_bird_mini_dev`, `dev_split` β€” dataset access.
- `compare_results`, `execution_accuracy` β€” order-insensitive EA, BIRD parity.
- `schema_recall_at_k` β€” secondary RAG metric.
- `Configuration`, `EvalRecord`, `EvalRun`, `run_config_a` β€” runner.
- `write_json_report`, `write_html_report` β€” artefact writers.
"""

from __future__ import annotations

from nl_sql.eval.dataset import (
    BirdExample,
    dev_split,
    extract_gold_tables,
    load_bird_mini_dev,
)
from nl_sql.eval.metrics.execution_accuracy import (
    ResultComparison,
    compare_results,
    execution_accuracy,
)
from nl_sql.eval.metrics.schema_recall import schema_recall_at_k
from nl_sql.eval.report import (
    load_run_from_json,
    write_html_report,
    write_json_report,
)
from nl_sql.eval.runner import (
    Configuration,
    EvalRecord,
    EvalRun,
    EvalSummary,
    run_config_a,
    run_config_c,
    run_config_d,
    run_config_e,
    run_config_f,
    run_config_g,
)

__all__ = [
    "BirdExample",
    "Configuration",
    "EvalRecord",
    "EvalRun",
    "EvalSummary",
    "ResultComparison",
    "compare_results",
    "dev_split",
    "execution_accuracy",
    "extract_gold_tables",
    "load_bird_mini_dev",
    "load_run_from_json",
    "run_config_a",
    "run_config_c",
    "run_config_d",
    "run_config_e",
    "run_config_f",
    "run_config_g",
    "schema_recall_at_k",
    "write_html_report",
    "write_json_report",
]