github_sync / test /test_model.py
Bachstelze
save plot figure
9182b44
import pickle
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
import re # For using regular expressions
import matplotlib.pyplot as plt
import datetime
train_path = "../Datasets_all/A2_dataset_80.csv"
test_path = "../Datasets_all/A2_dataset_20.csv"
def save_prediction_plot(y_test, y_test_pred_baseline, baseline_test_r2):
# Visualize baseline predictions
fig, axes = plt.subplots(figsize=(5, 5))
# Actual vs Predicted
axes.scatter(y_test, y_test_pred_baseline, alpha=0.5)
axes.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes.set_xlabel('Actual AimoScore')
axes.set_ylabel('Predicted AimoScore')
axes.set_title(f'Baseline: Actual vs Predicted (R²={baseline_test_r2:.4f})')
axes.grid(True, alpha=0.3)
# Save the figure
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") # e.g., 20260130_143210
fig_path = f"baseline_actual_vs_predicted_{timestamp}.png"
plt.savefig(fig_path, dpi=300, bbox_inches='tight')
print(f"Figure saved to {fig_path}")
def extract_missing_feature(error_message):
# Use regex to find feature names in the ValueError message
match = re.search(r"Feature names unseen at fit time:\s*-\s*(.+)", error_message)
if match:
return match.group(1).strip().split(', ') # Return list of feature names
return []
def load_and_evaluate_model(model_path):
# Load the pickled model
with open(model_path, "rb") as f:
model = pickle.load(f)
# Check the model type
assert isinstance(model, LinearRegression)
# Load data
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)
# Define target and features dynamically
target_col = "AimoScore"
unwanted_cols = ["EstimatedScore"]
features_cols = [
col for col in train_df.columns
if col not in unwanted_cols and col != target_col
]
# Initialize features for prediction
X_test = test_df[features_cols]
y_test = test_df[target_col]
#define y_pred and r2
y_pred, r2 = 0, 0
# Continue to predict until no ValueErrors occur
while True:
try:
# Predict on test set
y_pred = model.predict(X_test)
# Evaluate
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error on test set: {mae:.4f}")
print(f"R^2 score on test set: {r2:.4f}")
# Assert the threshold values
assert mae < 0.15, "Mean Absolute Error is too high"
assert r2 > 0.5, "R^2 score is too low"
break # Exit the loop if no errors occur
except ValueError as e:
print(f"Error during prediction: {e}")
# Extract missing features from the error message
missing_features = extract_missing_feature(str(e))
# Remove missing features from X_test and features_cols
if missing_features:
print(f"Removing missing features from test set: {missing_features}")
# Update features list
features_cols = [col for col in features_cols if col not in missing_features]
# Update X_test
X_test = X_test[features_cols]
else:
print("No more features can be removed, stopping execution.")
break # Exit if there are no more features to remove
if 'y_pred' in locals(): # Check if predictions were made
# Save predictions to CSV
test_df["Predicted_AimoScore"] = y_pred
test_df.to_csv("predicted_test.csv", index=False)
else:
print("no predictions!!!")
save_prediction_plot(y_test, y_pred, r2)
if __name__ == "__main__":
model_path = "linear_regression_model.pkl"
load_and_evaluate_model(model_path)