Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from xgboost import XGBClassifier | |
| from sklearn.metrics import accuracy_score | |
| from data_preparation.prepare_dataset import get_numpy_splits | |
| import os | |
| print("Loading dataset for evaluation...") | |
| splits, _, _, _ = get_numpy_splits( | |
| model_name="face_orientation", | |
| split_ratios=(0.7, 0.15, 0.15), | |
| seed=42, | |
| scale=False | |
| ) | |
| X_train, y_train = splits["X_train"], splits["y_train"] | |
| X_val, y_val = splits["X_val"], splits["y_val"] | |
| csv_path = 'models/xgboost/sweep_results_all_40.csv' | |
| df = pd.read_csv(csv_path) | |
| # We will calculate accuracy for each row | |
| accuracies = [] | |
| print(f"Re-evaluating {len(df)} configurations for accuracy. This will take a few minutes...") | |
| for idx, row in df.iterrows(): | |
| params = { | |
| "n_estimators": int(row["n_estimators"]), | |
| "max_depth": int(row["max_depth"]), | |
| "learning_rate": float(row["learning_rate"]), | |
| "subsample": float(row["subsample"]), | |
| "colsample_bytree": float(row["colsample_bytree"]), | |
| "reg_alpha": float(row["reg_alpha"]), | |
| "reg_lambda": float(row["reg_lambda"]), | |
| "random_state": 42, | |
| "use_label_encoder": False, | |
| "verbosity": 0, | |
| "eval_metric": "logloss" | |
| } | |
| # Train the exact same model quickly | |
| model = XGBClassifier(**params) | |
| model.fit(X_train, y_train) | |
| # Get validation predictions and calculate accuracy | |
| val_preds = model.predict(X_val) | |
| acc = accuracy_score(y_val, val_preds) | |
| accuracies.append(round(acc, 4)) | |
| if (idx + 1) % 5 == 0: | |
| print(f"Processed {idx + 1}/{len(df)} trials...") | |
| # Add accuracy column and save back to CSV | |
| df.insert(2, 'val_accuracy', accuracies) | |
| df.to_csv(csv_path, index=False) | |
| print(f"\nDone! Updated {csv_path} with 'val_accuracy'.") | |
| # Display the top 5 by accuracy now just to see | |
| top5_acc = df.nlargest(5, 'val_accuracy')[['task_id', 'val_accuracy', 'val_f1', 'val_loss']] | |
| print("\nTop 5 Trials by Accuracy:") | |
| print(top5_acc.to_string(index=False)) | |