Spaces:

liovina
/

nl-sql

Sleeping

App Files Files Community

nl-sql / src /nl_sql /agent /state.py

liovina

Deploy NL_SQL HEAD to HF Space

942050b verified 22 days ago

raw

history blame contribute delete

3.28 kB

	"""Mutable state shared across all pipeline nodes.

	LangGraph's `StateGraph` merges per-node return dicts back into this state.
	Keep field semantics tight — every reader/writer is a node, and adding a
	field with unclear ownership multiplies coupling fast.

	Optional fields default to ``None`` (not missing) so node code can `state.get`
	and reason about presence vs. absence cheaply.
	"""

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import Any, TypedDict

	from nl_sql.db.connection import Dialect
	from nl_sql.execution.errors import ExecutionErrorKind
	from nl_sql.execution.runner import ExecutionOutcome
	from nl_sql.render.formats import OutputFormat
	from nl_sql.schema_index.retriever import ContextBundle


	@dataclass(frozen=True, slots=True)
	class GenerateSQLOutput:
	"""Structured output of the `generate_sql` node.

	Per docs/02_architecture_v2.md §3, the LLM returns ``sql + rationale +
	tables_used + confidence``. ``raw_text`` keeps the original response
	for tracing/debugging — handy when JSON parsing degraded or the model
	hallucinated keys.
	"""

	sql: str
	rationale: str = ""
	tables_used: tuple[str, ...] = ()
	confidence: float = 0.0
	raw_text: str = ""


	class PipelineState(TypedDict, total=False):
	"""Per-question state. ``total=False`` so partial dicts merge cleanly."""

	# --- input ----------------------------------------------------------
	question: str
	db_id: str
	dialect: Dialect

	# --- after context_builder -----------------------------------------
	context: ContextBundle \| None

	# --- after plan_query (optional, only when enable_planner=True) ----
	plan: str
	"""Structured plan (raw JSON text) produced by `plan_query` before
	SQL generation. Empty when the planner stage is disabled."""

	# --- after generate_sql --------------------------------------------
	generated: GenerateSQLOutput \| None

	# --- after validate -------------------------------------------------
	# ExecutionOutcome carries both validation report and (later) execution
	# result, so a single field covers both phases.
	outcome: ExecutionOutcome \| None

	# --- repair bookkeeping --------------------------------------------
	repair_attempted: bool
	last_error: str # error context fed into the repair prompt
	verify_retry_on_empty: bool
	"""When True, the empty-result branch in `_route_after_execute` flows
	to `repair_once` (subject to the repair_attempted guard) instead of
	short-circuiting to deterministic_format. Empty rows are often a
	silent miss (wrong filter value, case mismatch, NULL handling), so a
	second LLM pass with the empty-result signal can recover them. Set
	by `run_config_g`; off everywhere else."""
	critique_failed: bool

	# --- after deterministic_format ------------------------------------
	output_format: OutputFormat \| None

	# --- after explain_trace -------------------------------------------
	caption: str

	# --- terminal status ------------------------------------------------
	error_kind: ExecutionErrorKind \| None
	error_message: str

	# --- diagnostic / observability -------------------------------------
	trace: list[dict[str, Any]]