"""Render a `Source` into the canonical text block consumed by the planner.""" from __future__ import annotations from .models import Source def render_source(source: Source) -> str: """Render a Source as the canonical text block consumed by the planner. Stable identifiers (source_id / table_id / column_id) are rendered alongside names. The planner must copy these verbatim into the IR; the IRValidator does a literal ID lookup, so anything else fails. Columns show data type, sample values (or `PII (suppressed)`), and populated stats only (min/max suppressed for string/bool, where they're useless). Top values are listed when available for low-cardinality cols. Foreign keys are resolved to names. """ lines: list[str] = [ f"Source: {source.name} ({source.source_type})", f"Source ID: {source.source_id}", "", "Tables:", ] tables_by_id = {t.table_id: t for t in source.tables} col_names_by_id = { t.table_id: {c.column_id: c.name for c in t.columns} for t in source.tables } for table in source.tables: rc = table.row_count rc_str = f" ({rc:,} rows)" if rc is not None else "" lines.append("") lines.append(f" Table: {table.name}{rc_str} — id={table.table_id}") lines.append(" Columns:") for col in table.columns: samples = "PII (suppressed)" if col.pii_flag else (col.sample_values or []) stats_parts: list[str] = [] if col.stats: if col.stats.min is not None: stats_parts.append(f"min={col.stats.min}") if col.stats.max is not None: stats_parts.append(f"max={col.stats.max}") if col.stats.mean is not None: stats_parts.append(f"mean={col.stats.mean:.4g}") if col.stats.median is not None: stats_parts.append(f"median={col.stats.median:.4g}") if col.stats.distinct_count is not None: stats_parts.append(f"distinct={col.stats.distinct_count}") if col.stats.top_values: stats_parts.append(f"top={col.stats.top_values}") stats_str = (", " + ", ".join(stats_parts)) if stats_parts else "" lines.append( f" - {col.name} [{col.data_type}]: samples={samples}{stats_str} — id={col.column_id}" ) if table.foreign_keys: lines.append(" Foreign keys:") cols_in_this_table = {c.column_id: c.name for c in table.columns} for fk in table.foreign_keys: src_col_name = cols_in_this_table.get(fk.column_id, fk.column_id) tgt_table = tables_by_id.get(fk.target_table_id) tgt_table_name = tgt_table.name if tgt_table else fk.target_table_id tgt_col_name = col_names_by_id.get(fk.target_table_id, {}).get( fk.target_column_id, fk.target_column_id ) lines.append(f" - {src_col_name} -> {tgt_table_name}.{tgt_col_name}") return "\n".join(lines)