Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| main.py - CLI entry point for the Tabular AutoML framework. | |
| Usage: | |
| python main.py --data dataset.csv --target churn --task classification | |
| python main.py --data housing.csv --target price --task regression --time_budget 120 | |
| """ | |
| import argparse | |
| import sys | |
| import os | |
| import pandas as pd | |
| # Allow running from root directory | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| from automl import AutoML | |
| def parse_args(): | |
| parser = argparse.ArgumentParser( | |
| description="Tabular AutoML Framework", | |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter, | |
| ) | |
| parser.add_argument("--data", required=True, help="Path to CSV dataset") | |
| parser.add_argument("--target", required=True, help="Target column name") | |
| parser.add_argument( | |
| "--task", | |
| required=True, | |
| choices=["classification", "regression"], | |
| help="Task type", | |
| ) | |
| parser.add_argument( | |
| "--time_budget", | |
| type=float, | |
| default=None, | |
| help="Training time budget in seconds (optional)", | |
| ) | |
| parser.add_argument( | |
| "--n_trials", | |
| type=int, | |
| default=15, | |
| help="Number of Optuna HPO trials per model", | |
| ) | |
| parser.add_argument( | |
| "--output_dir", | |
| default="./automl_output", | |
| help="Directory to save outputs", | |
| ) | |
| parser.add_argument( | |
| "--no_feature_engineering", | |
| action="store_true", | |
| help="Disable feature engineering", | |
| ) | |
| parser.add_argument("--seed", type=int, default=42) | |
| return parser.parse_args() | |
| def main(): | |
| args = parse_args() | |
| # Load data | |
| print(f"\nLoading dataset: {args.data}") | |
| try: | |
| df = pd.read_csv(args.data) | |
| except Exception as e: | |
| print(f"Error loading CSV: {e}") | |
| sys.exit(1) | |
| if args.target not in df.columns: | |
| print(f"Error: target column '{args.target}' not found.") | |
| print(f"Available columns: {list(df.columns)}") | |
| sys.exit(1) | |
| # Run AutoML | |
| automl = AutoML( | |
| task_type=args.task, | |
| time_budget=args.time_budget, | |
| n_optuna_trials=args.n_trials, | |
| output_dir=args.output_dir, | |
| seed=args.seed, | |
| use_feature_engineering=not args.no_feature_engineering, | |
| ) | |
| automl.fit(df, target_col=args.target) | |
| automl.report() | |
| if __name__ == "__main__": | |
| main() | |