Spaces:

likhonsheikh
/

code-interpreter-sandbox

Sleeping

App Files Files Community

code-interpreter-sandbox / examples /ml_example.py

likhonsheikh

Initial commit: Advanced Code Interpreter Sandbox

523f6c3 verified 4 months ago

raw

history blame contribute delete

5.61 kB

	"""
	Machine Learning Example
	Demonstrates scikit-learn capabilities
	"""

	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
	from sklearn.linear_model import LogisticRegression, LinearRegression
	from sklearn.svm import SVC, SVR
	from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
	from sklearn.metrics import mean_squared_error, r2_score
	from sklearn.preprocessing import StandardScaler
	from sklearn.datasets import make_classification, make_regression
	import matplotlib.pyplot as plt

	print("=" * 60)
	print("MACHINE LEARNING EXAMPLE")
	print("=" * 60)

	# Classification Example
	print("\n🎯 CLASSIFICATION TASK")
	print("-" * 40)

	# Generate classification data
	X_class, y_class = make_classification(
	n_samples=1000,
	n_features=10,
	n_informative=5,
	n_redundant=2,
	n_classes=3,
	random_state=42
	)

	# Split data
	X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(
	X_class, y_class, test_size=0.2, random_state=42, stratify=y_class
	)

	# Train models
	models_cls = {
	'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
	'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
	'SVM': SVC(random_state=42)
	}

	results_cls = {}
	for name, model in models_cls.items():
	model.fit(X_train_cls, y_train_cls)
	y_pred = model.predict(X_test_cls)
	accuracy = accuracy_score(y_test_cls, y_pred)
	results_cls[name] = accuracy
	print(f"{name}: Accuracy = {accuracy:.4f}")

	# Best model
	best_cls = max(results_cls, key=results_cls.get)
	print(f"\n🏆 Best Classification Model: {best_cls} ({results_cls[best_cls]:.4f})")

	# Classification report for best model
	best_model_cls = models_cls[best_cls]
	y_pred_best = best_model_cls.predict(X_test_cls)
	print("\n📊 Classification Report:")
	print(classification_report(y_test_cls, y_pred_best))

	# Confusion matrix
	cm = confusion_matrix(y_test_cls, y_pred_best)
	print("\n🔢 Confusion Matrix:")
	print(cm)

	# Regression Example
	print("\n\n📈 REGRESSION TASK")
	print("-" * 40)

	# Generate regression data
	X_reg, y_reg = make_regression(
	n_samples=1000,
	n_features=10,
	noise=0.1,
	random_state=42
	)

	# Split data
	X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
	X_reg, y_reg, test_size=0.2, random_state=42
	)

	# Train models
	models_reg = {
	'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
	'Linear Regression': LinearRegression(),
	'SVR': SVR()
	}

	results_reg = {}
	predictions_reg = {}
	for name, model in models_reg.items():
	if name == 'SVR':
	# Scale features for SVR
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train_reg)
	X_test_scaled = scaler.transform(X_test_reg)
	model.fit(X_train_scaled, y_train_reg)
	y_pred = model.predict(X_test_scaled)
	else:
	model.fit(X_train_reg, y_train_reg)
	y_pred = model.predict(X_test_reg)

	mse = mean_squared_error(y_test_reg, y_pred)
	r2 = r2_score(y_test_reg, y_pred)
	results_reg[name] = {'MSE': mse, 'R2': r2}
	predictions_reg[name] = y_pred
	print(f"{name}:")
	print(f" - MSE: {mse:.4f}")
	print(f" - R2 Score: {r2:.4f}")

	# Best model (by R2 score)
	best_reg = max(results_reg, key=lambda k: results_reg[k]['R2'])
	print(f"\n🏆 Best Regression Model: {best_reg} (R2 = {results_reg[best_reg]['R2']:.4f})")

	# Feature importance (Random Forest)
	print("\n🎯 Feature Importance (Random Forest):")
	rf_class = models_cls['Random Forest']
	feature_names = [f'Feature_{i}' for i in range(X_class.shape[1])]
	importance = rf_class.feature_importances_
	for name, imp in zip(feature_names, importance):
	print(f" {name}: {imp:.4f}")

	# Visualizations
	fig, axes = plt.subplots(2, 2, figsize=(15, 12))

	# Plot 1: Model comparison (Classification)
	models_names = list(results_cls.keys())
	accuracies = list(results_cls.values())
	axes[0, 0].bar(models_names, accuracies, color=['skyblue', 'lightgreen', 'salmon'])
	axes[0, 0].set_title('Classification Models Comparison')
	axes[0, 0].set_ylabel('Accuracy')
	axes[0, 0].tick_params(axis='x', rotation=45)

	# Plot 2: Confusion Matrix
	import seaborn as sns
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0, 1])
	axes[0, 1].set_title('Confusion Matrix')
	axes[0, 1].set_xlabel('Predicted')
	axes[0, 1].set_ylabel('Actual')

	# Plot 3: Model comparison (Regression)
	reg_models = list(results_reg.keys())
	r2_scores = [results_reg[m]['R2'] for m in reg_models]
	axes[1, 0].bar(reg_models, r2_scores, color=['orange', 'lightcoral', 'gold'])
	axes[1, 0].set_title('Regression Models Comparison (R²)')
	axes[1, 0].set_ylabel('R² Score')
	axes[1, 0].tick_params(axis='x', rotation=45)

	# Plot 4: Predictions vs Actual (best model)
	y_pred_best_reg = predictions_reg[best_reg]
	axes[1, 1].scatter(y_test_reg, y_pred_best_reg, alpha=0.5)
	axes[1, 1].plot([y_test_reg.min(), y_test_reg.max()],
	[y_test_reg.min(), y_test_reg.max()], 'r--', lw=2)
	axes[1, 1].set_xlabel('Actual')
	axes[1, 1].set_ylabel('Predicted')
	axes[1, 1].set_title(f'{best_reg} - Predictions vs Actual')

	plt.tight_layout()
	plt.show()

	# Summary
	print("\n" + "=" * 60)
	print("SUMMARY")
	print("=" * 60)
	print(f"✅ Classification completed: {best_cls} achieved {results_cls[best_cls]:.2%} accuracy")
	print(f"✅ Regression completed: {best_reg} achieved R² = {results_reg[best_reg]['R2']:.4f}")
	print(f"✅ Feature importance analysis complete")
	print(f"✅ Visualizations generated")