| """Render a `Source` into the canonical text block consumed by the planner.""" | |
| from __future__ import annotations | |
| from .models import Source | |
| def render_source(source: Source) -> str: | |
| """Render a Source as the canonical text block consumed by the planner. | |
| Stable identifiers (source_id / table_id / column_id) are rendered | |
| alongside names. The planner must copy these verbatim into the IR; | |
| the IRValidator does a literal ID lookup, so anything else fails. | |
| Columns show data type, sample values (or `PII (suppressed)`), and | |
| populated stats only (min/max suppressed for string/bool, where they're | |
| useless). Top values are listed when available for low-cardinality cols. | |
| Foreign keys are resolved to names. | |
| """ | |
| lines: list[str] = [ | |
| f"Source: {source.name} ({source.source_type})", | |
| f"Source ID: {source.source_id}", | |
| "", | |
| "Tables:", | |
| ] | |
| tables_by_id = {t.table_id: t for t in source.tables} | |
| col_names_by_id = { | |
| t.table_id: {c.column_id: c.name for c in t.columns} for t in source.tables | |
| } | |
| for table in source.tables: | |
| rc = table.row_count | |
| rc_str = f" ({rc:,} rows)" if rc is not None else "" | |
| lines.append("") | |
| lines.append(f" Table: {table.name}{rc_str} — id={table.table_id}") | |
| lines.append(" Columns:") | |
| for col in table.columns: | |
| samples = "PII (suppressed)" if col.pii_flag else (col.sample_values or []) | |
| stats_parts: list[str] = [] | |
| if col.stats: | |
| if col.stats.min is not None: | |
| stats_parts.append(f"min={col.stats.min}") | |
| if col.stats.max is not None: | |
| stats_parts.append(f"max={col.stats.max}") | |
| if col.stats.mean is not None: | |
| stats_parts.append(f"mean={col.stats.mean:.4g}") | |
| if col.stats.median is not None: | |
| stats_parts.append(f"median={col.stats.median:.4g}") | |
| if col.stats.distinct_count is not None: | |
| stats_parts.append(f"distinct={col.stats.distinct_count}") | |
| if col.stats.top_values: | |
| stats_parts.append(f"top={col.stats.top_values}") | |
| stats_str = (", " + ", ".join(stats_parts)) if stats_parts else "" | |
| lines.append( | |
| f" - {col.name} [{col.data_type}]: samples={samples}{stats_str} — id={col.column_id}" | |
| ) | |
| if table.foreign_keys: | |
| lines.append(" Foreign keys:") | |
| cols_in_this_table = {c.column_id: c.name for c in table.columns} | |
| for fk in table.foreign_keys: | |
| src_col_name = cols_in_this_table.get(fk.column_id, fk.column_id) | |
| tgt_table = tables_by_id.get(fk.target_table_id) | |
| tgt_table_name = tgt_table.name if tgt_table else fk.target_table_id | |
| tgt_col_name = col_names_by_id.get(fk.target_table_id, {}).get( | |
| fk.target_column_id, fk.target_column_id | |
| ) | |
| lines.append(f" - {src_col_name} -> {tgt_table_name}.{tgt_col_name}") | |
| return "\n".join(lines) | |