| import matplotlib.pyplot as plt |
| import seaborn as sns |
| import pandas as pd |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix |
|
|
| plt.rcParams['figure.facecolor'] = '#1F2937' |
| plt.rcParams['axes.facecolor'] = '#0B0F19' |
| plt.rcParams['text.color'] = 'white' |
| plt.rcParams['axes.labelcolor'] = 'white' |
| plt.rcParams['xtick.color'] = 'white' |
| plt.rcParams['ytick.color'] = 'white' |
|
|
|
|
| def plot_feature_distributions(real_data, synthetic_data): |
| features = real_data.columns.to_list() |
|
|
| n_cols = 3 |
| n_rows = (len(features) + n_cols - 1) // n_cols |
|
|
| fig, axes = plt.subplots(n_rows, n_cols, figsize=(18, 5 * n_rows)) |
|
|
| for i, feature in enumerate(features): |
| row = i // n_cols |
| col = i % n_cols |
|
|
| sns.histplot( |
| real_data[feature], |
| bins=30, |
| color='skyblue', |
| stat='count', |
| element='step', |
| fill=True, |
| alpha=0.2, |
| ax=axes[row, col] |
| ) |
| |
| sns.histplot( |
| synthetic_data[feature], |
| bins=30, |
| color='indianred', |
| stat='count', |
| element='step', |
| fill=True, |
| alpha=0.2, |
| ax=axes[row, col] |
| ) |
| |
| axes[row, col].set_title(f'Distribution of {feature}') |
| axes[row, col].set_xlabel(feature) |
| axes[row, col].set_ylabel('Frequency') |
| axes[row, col].legend(['Real Data', 'Synthetic Data']) |
|
|
| for j in range(i + 1, n_rows * n_cols): |
| fig.delaxes(axes[j // n_cols, j % n_cols]) |
| |
|
|
| plt.tight_layout() |
| return fig |
|
|
|
|
| def get_metrics_df(y_true, y_real_pred, y_synth_pred): |
| metrics = { |
| 'Model': ['Real Data Model', 'Synthetic Data Model'], |
| 'Accuracy': [ |
| accuracy_score(y_true, y_real_pred), |
| accuracy_score(y_true, y_synth_pred) |
| ], |
| 'Precision': [ |
| precision_score(y_true, y_real_pred, average='weighted'), |
| precision_score(y_true, y_synth_pred, average='weighted') |
| ], |
| 'Recall': [ |
| recall_score(y_true, y_real_pred, average='weighted'), |
| recall_score(y_true, y_synth_pred, average='weighted') |
| ], |
| 'F1-Score': [ |
| f1_score(y_true, y_real_pred, average='weighted'), |
| f1_score(y_true, y_synth_pred, average='weighted') |
| ] |
| } |
| return pd.DataFrame(metrics) |
|
|
|
|
| def plot_comparative_credit_score_distribution( |
| real_scores, |
| synth_scores, |
| bins=50, |
| title='Comparative Credit Score Distribution: Real vs Synthetic Models' |
| ): |
| fig, axes = plt.subplots(1, 2, figsize=(16, 5), sharey=True) |
|
|
| sns.histplot( |
| real_scores, |
| bins=bins, |
| stat='count', |
| element='step', |
| fill=True, |
| alpha=0.2, |
| color='skyblue', |
| ax=axes[0] |
| ) |
| axes[0].set_title('Real-Data Model Score Distribution') |
| axes[0].set_xlabel('Predicted Credit Score') |
| axes[0].set_ylabel('Frequency') |
|
|
| sns.histplot( |
| synth_scores, |
| bins=bins, |
| stat='count', |
| element='step', |
| fill=True, |
| alpha=0.2, |
| color='skyblue', |
| ax=axes[1] |
| ) |
| axes[1].set_title('Synthetic-Data Model Score Distribution') |
| axes[1].set_xlabel('Predicted Credit Score') |
| axes[1].set_ylabel('Frequency') |
|
|
| plt.tight_layout() |
| return fig |
|
|
|
|
| def plot_comparison_table( |
| y_true, y_real_pred, y_synth_pred, |
| title='Model Comparison: Real Data vs Synthetic Data' |
| ): |
| metrics_df = get_metrics_df(y_true, y_real_pred, y_synth_pred) |
| display_df = metrics_df.copy().round(4).set_index('Model') |
|
|
| fig, ax = plt.subplots(figsize=(18, 2)) |
| ax.axis('off') |
|
|
| table = ax.table( |
| cellText=display_df.values, |
| rowLabels=display_df.index, |
| colLabels=display_df.columns, |
| cellLoc='center', |
| loc='center', |
| ) |
| table.auto_set_font_size(False) |
| table.set_fontsize(16) |
| table.scale(1.2, 1.9) |
|
|
| for j in range(len(display_df.columns)): |
| table[(0, j)].set_facecolor('#1F77B4') |
| table[(0, j)].set_text_props(color='white', weight='bold') |
|
|
| table[(0, j)].set_edgecolor('white') |
| table[(0, j)].set_linewidth(1) |
|
|
| for i in range(1, len(display_df.index) + 1): |
| bg = '#0B0F19' if i % 2 else '#0B0F19' |
|
|
| table[(i, -1)].set_text_props(color='white', weight='bold') |
| table[(i, -1)].set_facecolor(bg) |
| table[(i, -1)].set_edgecolor('white') |
| table[(i, -1)].set_linewidth(1) |
|
|
| for j in range(len(display_df.columns)): |
| table[(i, j)].set_facecolor(bg) |
| table[(i, j)].set_text_props(color='white') |
| table[(i, j)].set_edgecolor('white') |
| table[(i, j)].set_linewidth(1) |
|
|
| plt.tight_layout() |
| return fig |
|
|
|
|
| def plot_comparative_confusion_matrices( |
| y_true, |
| y_pred_real, |
| y_pred_synth, |
| labels=None, |
| normalize=False, |
| cmap='Blues' |
| ): |
| cm_real = confusion_matrix(y_true, y_pred_real, labels=labels) |
| cm_synth = confusion_matrix(y_true, y_pred_synth, labels=labels) |
|
|
| if normalize: |
| cm_real_plot = cm_real.astype(float) / cm_real.sum(axis=1, keepdims=True) |
| cm_synth_plot = cm_synth.astype(float) / cm_synth.sum(axis=1, keepdims=True) |
| fmt = '.2f' |
| else: |
| cm_real_plot = cm_real |
| cm_synth_plot = cm_synth |
| fmt = 'd' |
|
|
| fig, axes = plt.subplots(1, 2, figsize=(16, 6)) |
|
|
| sns.heatmap( |
| cm_real_plot, annot=True, fmt=fmt, cmap=cmap, |
| xticklabels=labels, yticklabels=labels, ax=axes[0] |
| ) |
| axes[0].set_title(f"Real Data Confusion Matrix") |
| axes[0].set_xlabel("Predicted") |
| axes[0].set_ylabel("Actual") |
|
|
| sns.heatmap( |
| cm_synth_plot, annot=True, fmt=fmt, cmap=cmap, |
| xticklabels=labels, yticklabels=labels, ax=axes[1] |
| ) |
| axes[1].set_title(f"Synthetic Data Confusion Matrix") |
| axes[1].set_xlabel("Predicted") |
| axes[1].set_ylabel("Actual") |
|
|
| plt.tight_layout() |
| return fig |
|
|
|
|
| def plot_comparative_credit_score_distribution_by_actual_class( |
| y_true, |
| real_scores, |
| synth_scores, |
| color_map, |
| label_order=None, |
| bins=50, |
| ): |
| fig, (ax_left, ax_right) = plt.subplots(1, 2, figsize=(16, 5), sharey=True) |
|
|
| y_true_arr = pd.Series(y_true).values |
|
|
| for label in label_order: |
| mask = (y_true_arr == label) |
|
|
| sns.histplot( |
| real_scores[mask], |
| bins=bins, |
| stat='count', |
| element='step', |
| fill=True, |
| alpha=0.2, |
| color=color_map.get(label, None), |
| label=label, |
| ax=ax_left |
| ) |
|
|
| sns.histplot( |
| synth_scores[mask], |
| bins=bins, |
| stat='count', |
| element='step', |
| fill=True, |
| alpha=0.2, |
| color=color_map.get(label, None), |
| label=label, |
| ax=ax_right |
| ) |
|
|
| ax_left.set_title('Real-Data Model: Actual Class Distribution') |
| ax_left.set_xlabel('Predicted Credit Score') |
| ax_left.set_ylabel('Frequency') |
| ax_left.legend(title='Actual Class') |
|
|
| ax_right.set_title('Synthetic-Data Model: Actual Class Distribution') |
| ax_right.set_xlabel('Predicted Credit Score') |
| ax_right.set_ylabel('Frequency') |
| ax_right.legend(title='Actual Class') |
|
|
| plt.tight_layout() |
| return fig |
|
|
|
|
| def plot_evaluation_table(summary_df, title="Synthetic Data Evaluation Summary"): |
| display_df = summary_df.copy().round(4) |
|
|
| fig, ax = plt.subplots(figsize=(18, 2)) |
| ax.axis("off") |
|
|
| table = ax.table( |
| cellText = display_df.values, |
| rowLabels = display_df.index, |
| colLabels = display_df.columns, |
| cellLoc = "center", |
| loc = "center", |
| ) |
| table.auto_set_font_size(False) |
| table.set_fontsize(16) |
| table.scale(1.2, 1.9) |
|
|
| for j in range(len(display_df.columns)): |
| table[(0, j)].set_facecolor("#1F77B4") |
| table[(0, j)].set_text_props(color="white", weight="bold") |
| table[(0, j)].set_edgecolor("white") |
| table[(0, j)].set_linewidth(1) |
|
|
| for i in range(1, len(display_df.index) + 1): |
| bg = "#0B0F19" |
|
|
| table[(i, -1)].set_text_props(color="white", weight="bold") |
| table[(i, -1)].set_facecolor(bg) |
| table[(i, -1)].set_edgecolor("white") |
| table[(i, -1)].set_linewidth(1) |
|
|
| for j in range(len(display_df.columns)): |
| table[(i, j)].set_facecolor(bg) |
| table[(i, j)].set_text_props(color="white") |
| table[(i, j)].set_edgecolor("white") |
| table[(i, j)].set_linewidth(1) |
|
|
| ax.set_title(title, color="white", fontsize=16, weight="bold", pad=12) |
|
|
| plt.tight_layout() |
| return fig |