-- Assumptions: -- 1. sql is correct -- 2. only table name has alias -- 3. only one intersect/union/except module Spider { -- val: number(float)/string(str)/sql(dict) val = Number(object f) | String(string s) | ValSql(sql s) | ColUnit(col_unit c) | Terminal -- col_unit: (agg_id, col_id, isDistinct(bool)) col_unit = ( agg_type agg_id, -- TODO fix column col_id, singleton is_distinct ) -- val_unit: (unit_op, col_unit1, col_unit2) -- val_unit = ( -- unit_type unit_op, -- col_unit col_unit1, -- col_unit col_unit2 -- ) val_unit = Column(col_unit col_unit1) | Minus(col_unit col_unit1, col_unit col_unit2) | Plus(col_unit col_unit1, col_unit col_unit2) | Times(col_unit col_unit1, col_unit col_unit2) | Divide(col_unit col_unit1, col_unit col_unit2) -- table_unit: (table_type, col_unit/sql) table_unit = TableUnitSql(sql s) | Table(table table_id) -- condition: [cond_unit1, 'and'/'or', cond_unit2, ...] -- cond_unit: (not_op, op_id, val_unit, val1, val2) cond = And(cond left, cond right) | Or(cond left, cond right) | Not(cond c) | Between(val_unit val_unit, val val1, val val2) | Eq(val_unit val_unit, val val1) | Gt(val_unit val_unit, val val1) | Lt(val_unit val_unit, val val1) | Ge(val_unit val_unit, val val1) | Le(val_unit val_unit, val val1) | Ne(val_unit val_unit, val val1) | In(val_unit val_unit, val val1) | Like(val_unit val_unit, val val1) -- These don't ever appear in the dataset -- | Is(val_unit val_unit, val val1) -- | Exists(val_unit val_unit, val val1) -- | CondUnit(singleton not_op, cond_op op_id, val_unit val_unit, val val1, val val2) -- sql { -- 'select': (isDistinct(bool), [(agg_id, val_unit), (agg_id, val_unit), ...]) -- 'from': {'table_units': [table_unit1, table_unit2, ...], 'conds': condition} -- 'where': condition -- 'groupBy': [col_unit1, col_unit2, ...] -- 'orderBy': ('asc'/'desc', [val_unit1, val_unit2, ...]) -- 'having': condition -- 'limit': None/limit value -- 'intersect': None/sql -- 'except': None/sql -- 'union': None/sql -- } sql = ( select select, from from, cond? where, col_unit* group_by, order_by? order_by, cond? having, int? limit, sql? intersect, sql? except, sql? union, ) -- 'select': (isDistinct(bool), [(agg_id, val_unit), (agg_id, val_unit), ...]) select = (singleton is_distinct, agg* aggs) agg = (agg_type agg_id, val_unit val_unit) -- 'from': {'table_units': [table_unit1, table_unit2, ...], 'conds': condition} from = (table_unit* table_units, cond? conds) -- 'orderBy': ('asc'/'desc', [val_unit1, val_unit2, ...]) order_by = (order order, val_unit* val_units) -- CLAUSE_KEYWORDS = ('select', 'from', 'where', 'group', 'order', 'limit', 'intersect', 'union', 'except') -- JOIN_KEYWORDS = ('join', 'on', 'as') -- WHERE_OPS = ('not', 'between', '=', '>', '<', '>=', '<=', '!=', 'in', 'like', 'is', 'exists') -- cond_type = Between | Eq | Gt | Lt | Ge | Le | Ne | In | Like | Is | Exists -- UNIT_OPS = ('none', '-', '+', "*", '/') --unit_type = NoneUnitOp | Minus | Plus | Times | Divide -- AGG_OPS = ('none', 'max', 'min', 'count', 'sum', 'avg') agg_type = NoneAggOp | Max | Min | Count | Sum | Avg -- TABLE_TYPE = { -- 'sql': "sql", -- 'table_unit': "table_unit", -- } -- COND_OPS = ('and', 'or') -- SQL_OPS = ('intersect', 'union', 'except') -- ORDER_OPS = ('desc', 'asc') order = Asc | Desc }