| | """ |
| | Machine Learning Example |
| | Demonstrates scikit-learn capabilities |
| | """ |
| |
|
| | import numpy as np |
| | import pandas as pd |
| | from sklearn.model_selection import train_test_split |
| | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor |
| | from sklearn.linear_model import LogisticRegression, LinearRegression |
| | from sklearn.svm import SVC, SVR |
| | from sklearn.metrics import accuracy_score, classification_report, confusion_matrix |
| | from sklearn.metrics import mean_squared_error, r2_score |
| | from sklearn.preprocessing import StandardScaler |
| | from sklearn.datasets import make_classification, make_regression |
| | import matplotlib.pyplot as plt |
| |
|
| | print("=" * 60) |
| | print("MACHINE LEARNING EXAMPLE") |
| | print("=" * 60) |
| |
|
| | |
| | print("\nπ― CLASSIFICATION TASK") |
| | print("-" * 40) |
| |
|
| | |
| | X_class, y_class = make_classification( |
| | n_samples=1000, |
| | n_features=10, |
| | n_informative=5, |
| | n_redundant=2, |
| | n_classes=3, |
| | random_state=42 |
| | ) |
| |
|
| | |
| | X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split( |
| | X_class, y_class, test_size=0.2, random_state=42, stratify=y_class |
| | ) |
| |
|
| | |
| | models_cls = { |
| | 'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42), |
| | 'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000), |
| | 'SVM': SVC(random_state=42) |
| | } |
| |
|
| | results_cls = {} |
| | for name, model in models_cls.items(): |
| | model.fit(X_train_cls, y_train_cls) |
| | y_pred = model.predict(X_test_cls) |
| | accuracy = accuracy_score(y_test_cls, y_pred) |
| | results_cls[name] = accuracy |
| | print(f"{name}: Accuracy = {accuracy:.4f}") |
| |
|
| | |
| | best_cls = max(results_cls, key=results_cls.get) |
| | print(f"\nπ Best Classification Model: {best_cls} ({results_cls[best_cls]:.4f})") |
| |
|
| | |
| | best_model_cls = models_cls[best_cls] |
| | y_pred_best = best_model_cls.predict(X_test_cls) |
| | print("\nπ Classification Report:") |
| | print(classification_report(y_test_cls, y_pred_best)) |
| |
|
| | |
| | cm = confusion_matrix(y_test_cls, y_pred_best) |
| | print("\nπ’ Confusion Matrix:") |
| | print(cm) |
| |
|
| | |
| | print("\n\nπ REGRESSION TASK") |
| | print("-" * 40) |
| |
|
| | |
| | X_reg, y_reg = make_regression( |
| | n_samples=1000, |
| | n_features=10, |
| | noise=0.1, |
| | random_state=42 |
| | ) |
| |
|
| | |
| | X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split( |
| | X_reg, y_reg, test_size=0.2, random_state=42 |
| | ) |
| |
|
| | |
| | models_reg = { |
| | 'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42), |
| | 'Linear Regression': LinearRegression(), |
| | 'SVR': SVR() |
| | } |
| |
|
| | results_reg = {} |
| | predictions_reg = {} |
| | for name, model in models_reg.items(): |
| | if name == 'SVR': |
| | |
| | scaler = StandardScaler() |
| | X_train_scaled = scaler.fit_transform(X_train_reg) |
| | X_test_scaled = scaler.transform(X_test_reg) |
| | model.fit(X_train_scaled, y_train_reg) |
| | y_pred = model.predict(X_test_scaled) |
| | else: |
| | model.fit(X_train_reg, y_train_reg) |
| | y_pred = model.predict(X_test_reg) |
| |
|
| | mse = mean_squared_error(y_test_reg, y_pred) |
| | r2 = r2_score(y_test_reg, y_pred) |
| | results_reg[name] = {'MSE': mse, 'R2': r2} |
| | predictions_reg[name] = y_pred |
| | print(f"{name}:") |
| | print(f" - MSE: {mse:.4f}") |
| | print(f" - R2 Score: {r2:.4f}") |
| |
|
| | |
| | best_reg = max(results_reg, key=lambda k: results_reg[k]['R2']) |
| | print(f"\nπ Best Regression Model: {best_reg} (R2 = {results_reg[best_reg]['R2']:.4f})") |
| |
|
| | |
| | print("\nπ― Feature Importance (Random Forest):") |
| | rf_class = models_cls['Random Forest'] |
| | feature_names = [f'Feature_{i}' for i in range(X_class.shape[1])] |
| | importance = rf_class.feature_importances_ |
| | for name, imp in zip(feature_names, importance): |
| | print(f" {name}: {imp:.4f}") |
| |
|
| | |
| | fig, axes = plt.subplots(2, 2, figsize=(15, 12)) |
| |
|
| | |
| | models_names = list(results_cls.keys()) |
| | accuracies = list(results_cls.values()) |
| | axes[0, 0].bar(models_names, accuracies, color=['skyblue', 'lightgreen', 'salmon']) |
| | axes[0, 0].set_title('Classification Models Comparison') |
| | axes[0, 0].set_ylabel('Accuracy') |
| | axes[0, 0].tick_params(axis='x', rotation=45) |
| |
|
| | |
| | import seaborn as sns |
| | sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0, 1]) |
| | axes[0, 1].set_title('Confusion Matrix') |
| | axes[0, 1].set_xlabel('Predicted') |
| | axes[0, 1].set_ylabel('Actual') |
| |
|
| | |
| | reg_models = list(results_reg.keys()) |
| | r2_scores = [results_reg[m]['R2'] for m in reg_models] |
| | axes[1, 0].bar(reg_models, r2_scores, color=['orange', 'lightcoral', 'gold']) |
| | axes[1, 0].set_title('Regression Models Comparison (RΒ²)') |
| | axes[1, 0].set_ylabel('RΒ² Score') |
| | axes[1, 0].tick_params(axis='x', rotation=45) |
| |
|
| | |
| | y_pred_best_reg = predictions_reg[best_reg] |
| | axes[1, 1].scatter(y_test_reg, y_pred_best_reg, alpha=0.5) |
| | axes[1, 1].plot([y_test_reg.min(), y_test_reg.max()], |
| | [y_test_reg.min(), y_test_reg.max()], 'r--', lw=2) |
| | axes[1, 1].set_xlabel('Actual') |
| | axes[1, 1].set_ylabel('Predicted') |
| | axes[1, 1].set_title(f'{best_reg} - Predictions vs Actual') |
| |
|
| | plt.tight_layout() |
| | plt.show() |
| |
|
| | |
| | print("\n" + "=" * 60) |
| | print("SUMMARY") |
| | print("=" * 60) |
| | print(f"β
Classification completed: {best_cls} achieved {results_cls[best_cls]:.2%} accuracy") |
| | print(f"β
Regression completed: {best_reg} achieved RΒ² = {results_reg[best_reg]['R2']:.4f}") |
| | print(f"β
Feature importance analysis complete") |
| | print(f"β
Visualizations generated") |
| |
|