| import pandas as pd
|
|
|
|
|
| df_original = pd.read_csv("../data/raw/train_clean.csv")
|
|
|
| def clean_loans(text):
|
| if pd.isna(text):
|
| return []
|
|
|
| text = text.replace(" and ", ", ")
|
| loans = [l.strip() for l in text.split(",")]
|
|
|
| loans = [l for l in loans if l != ""]
|
| return list(set(loans))
|
|
|
|
|
| df_original["Loan_List"] = df_original["Type_of_Loan"].apply(clean_loans)
|
|
|
|
|
|
|
| all_loans = set()
|
| for row in df_original["Loan_List"]:
|
| all_loans.update(row)
|
|
|
| print(all_loans)
|
|
|
|
|
| for loan in all_loans:
|
| df_original[loan] = df_original["Loan_List"].apply(lambda x: int(loan in x))
|
|
|
|
|
| df_original = df_original.drop(columns=["Type_of_Loan", "Loan_List"])
|
|
|
|
|
| output_path = "./train_clean_type.csv"
|
| df_original.to_csv(output_path, index=False)
|
|
|
| print(f" File saved to: {output_path}")
|
| print(f"shape: {df_original.shape}")
|
| print("New columns addeeeeeddd:", list(all_loans)) |