Bachstelze commited on
Commit
76e50c6
·
1 Parent(s): 2f8e50e

add dynamic feature resolving

Browse files
Files changed (1) hide show
  1. test/test_model.py +54 -25
test/test_model.py CHANGED
@@ -1,57 +1,86 @@
1
  import pickle
2
-
3
  import pandas as pd
4
  from sklearn.linear_model import LinearRegression
5
  from sklearn.metrics import mean_absolute_error, r2_score
 
6
 
7
  train_path = "../Datasets_all/A2_dataset_80.csv"
8
  test_path = "../Datasets_all/A2_dataset_20.csv"
9
 
10
- # validating the linear regression model based on
11
- # https://medium.com/@_SSP/validating-machine-learning-regression-models-a-comprehensive-guide-b94fd94e339c
12
-
 
 
 
13
 
14
  def load_and_evaluate_model(model_path):
15
  # Load the pickled model
16
  with open(model_path, "rb") as f:
17
  model = pickle.load(f)
18
 
19
- # check the model type
20
  assert isinstance(model, LinearRegression)
21
 
22
  # Load data
23
  train_df = pd.read_csv(train_path)
24
  test_df = pd.read_csv(test_path)
25
 
26
- # Define target and features
27
  target_col = "AimoScore"
28
  unwanted_cols = ["EstimatedScore"]
 
29
  features_cols = [
30
- col
31
- for col in train_df.columns
32
  if col not in unwanted_cols and col != target_col
33
  ]
34
 
 
35
  X_test = test_df[features_cols]
36
  y_test = test_df[target_col]
37
 
38
- # Predict on test set
39
- y_pred = model.predict(X_test)
40
-
41
- # Evaluate
42
- mae = mean_absolute_error(y_test, y_pred)
43
- r2 = r2_score(y_test, y_pred)
44
-
45
- print(f"Mean Absolute Error on test set: {mae:.4f}")
46
- print(f"R^2 score on test set: {r2:.4f}")
47
-
48
- # assert the threeshold values
49
- assert mae < 0.15, "Mean Absolute Error is too high"
50
- assert r2 > 0.5, "R^2 score is too low"
51
-
52
- # Save predictions to CSV
53
- test_df["Predicted_AimoScore"] = y_pred
54
- test_df.to_csv("predicted_test.csv", index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  if __name__ == "__main__":
 
1
  import pickle
 
2
  import pandas as pd
3
  from sklearn.linear_model import LinearRegression
4
  from sklearn.metrics import mean_absolute_error, r2_score
5
+ import re # For using regular expressions
6
 
7
  train_path = "../Datasets_all/A2_dataset_80.csv"
8
  test_path = "../Datasets_all/A2_dataset_20.csv"
9
 
10
+ def extract_missing_feature(error_message):
11
+ # Use regex to find feature names in the ValueError message
12
+ match = re.search(r"Feature names unseen at fit time:\s*-\s*(.+)", error_message)
13
+ if match:
14
+ return match.group(1).strip().split(', ') # Return list of feature names
15
+ return []
16
 
17
  def load_and_evaluate_model(model_path):
18
  # Load the pickled model
19
  with open(model_path, "rb") as f:
20
  model = pickle.load(f)
21
 
22
+ # Check the model type
23
  assert isinstance(model, LinearRegression)
24
 
25
  # Load data
26
  train_df = pd.read_csv(train_path)
27
  test_df = pd.read_csv(test_path)
28
 
29
+ # Define target and features dynamically
30
  target_col = "AimoScore"
31
  unwanted_cols = ["EstimatedScore"]
32
+
33
  features_cols = [
34
+ col for col in train_df.columns
 
35
  if col not in unwanted_cols and col != target_col
36
  ]
37
 
38
+ # Initialize features for prediction
39
  X_test = test_df[features_cols]
40
  y_test = test_df[target_col]
41
 
42
+ #define y_pred
43
+ y_pred = 0
44
+
45
+ # Continue to predict until no ValueErrors occur
46
+ while True:
47
+ try:
48
+ # Predict on test set
49
+ y_pred = model.predict(X_test)
50
+
51
+ # Evaluate
52
+ mae = mean_absolute_error(y_test, y_pred)
53
+ r2 = r2_score(y_test, y_pred)
54
+
55
+ print(f"Mean Absolute Error on test set: {mae:.4f}")
56
+ print(f"R^2 score on test set: {r2:.4f}")
57
+
58
+ # Assert the threshold values
59
+ assert mae < 0.15, "Mean Absolute Error is too high"
60
+ assert r2 > 0.5, "R^2 score is too low"
61
+ break # Exit the loop if no errors occur
62
+
63
+ except ValueError as e:
64
+ print(f"Error during prediction: {e}")
65
+
66
+ # Extract missing features from the error message
67
+ missing_features = extract_missing_feature(str(e))
68
+
69
+ # Remove missing features from X_test and features_cols
70
+ if missing_features:
71
+ print(f"Removing missing features from test set: {missing_features}")
72
+ # Update features list
73
+ features_cols = [col for col in features_cols if col not in missing_features]
74
+ # Update X_test
75
+ X_test = X_test[features_cols]
76
+ else:
77
+ print("No more features can be removed, stopping execution.")
78
+ break # Exit if there are no more features to remove
79
+
80
+ if 'y_pred' in locals(): # Check if predictions were made
81
+ # Save predictions to CSV
82
+ test_df["Predicted_AimoScore"] = y_pred
83
+ test_df.to_csv("predicted_test.csv", index=False)
84
 
85
 
86
  if __name__ == "__main__":