import pandas as pd def preprocess_real_data(train, test, target): """Preprocesses the real training and testing datasets by selecting relevant features and encoding categorical variables. Args: train (pd.DataFrame): The real training dataset. test (pd.DataFrame): The real testing dataset. target (str): The name of the target variable. Returns: tuple: A tuple containing the preprocessed training features, training target, testing features, and testing target. """ train['Outstanding_Debt'] = train['Outstanding_Debt'] / 1000 test['Outstanding_Debt'] = test['Outstanding_Debt'] / 1000 cols = [ 'Num_Credit_Card', 'Changed_Credit_Limit', 'Delay_from_due_date', 'Interest_Rate', 'Credit_Mix', 'Outstanding_Debt', target ] train = train[cols] test = test[cols] train = pd.get_dummies(train, columns=['Credit_Mix'], drop_first=True) test = pd.get_dummies(test, columns=['Credit_Mix'], drop_first=True) X_real_train = train.drop(columns=[target]) y_real_train = train[target] X_real_test = test.drop(columns=[target]) y_real_test = test[target] return X_real_train, y_real_train, X_real_test, y_real_test def preprocess_synthetic_data(synthetic_data, target): """Preprocesses the synthetic dataset by selecting relevant features and encoding categorical variables. Args: synthetic_data (pd.DataFrame): The synthetic dataset to preprocess. target (str): The name of the target variable. Returns: tuple: A tuple containing the preprocessed synthetic features and synthetic target. """ synthetic_data['Outstanding_Debt'] = synthetic_data['Outstanding_Debt'] / 1000 synthetic_data = synthetic_data[[ 'Num_Credit_Card', 'Changed_Credit_Limit', 'Delay_from_due_date', 'Interest_Rate', 'Credit_Mix', 'Outstanding_Debt', target ]] synthetic_data = pd.get_dummies(synthetic_data, columns=['Credit_Mix'], drop_first=True) X_synthetic_train = synthetic_data.drop(columns=[target]) y_synthetic_train = synthetic_data[target] return X_synthetic_train, y_synthetic_train