| | """ |
| | Data Analysis Example |
| | Demonstrates data manipulation and visualization capabilities |
| | """ |
| |
|
| | import numpy as np |
| | import pandas as pd |
| | import matplotlib.pyplot as plt |
| | import seaborn as sns |
| | from datetime import datetime, timedelta |
| |
|
| | |
| | np.random.seed(42) |
| | dates = pd.date_range(start='2023-01-01', periods=365, freq='D') |
| | values = np.cumsum(np.random.randn(365)) + 100 |
| | df = pd.DataFrame({ |
| | 'date': dates, |
| | 'value': values, |
| | 'category': np.random.choice(['A', 'B', 'C'], 365) |
| | }) |
| |
|
| | print("=" * 60) |
| | print("DATA ANALYSIS EXAMPLE") |
| | print("=" * 60) |
| |
|
| | |
| | print("\nπ Dataset Overview:") |
| | print(f"Total records: {len(df)}") |
| | print(f"Date range: {df['date'].min()} to {df['date'].max()}") |
| | print(f"Categories: {df['category'].unique()}") |
| |
|
| | print("\nπ Basic Statistics:") |
| | print(df.describe()) |
| |
|
| | |
| | print("\nπ Category Distribution:") |
| | category_counts = df['category'].value_counts() |
| | print(category_counts) |
| |
|
| | |
| | print("\nβ° Time Series Analysis:") |
| | monthly_avg = df.groupby(df['date'].dt.month)['value'].mean() |
| | print(monthly_avg) |
| |
|
| | |
| | plt.figure(figsize=(15, 10)) |
| |
|
| | |
| | plt.subplot(2, 2, 1) |
| | plt.plot(df['date'], df['value']) |
| | plt.title('Time Series Data') |
| | plt.xlabel('Date') |
| | plt.ylabel('Value') |
| | plt.xticks(rotation=45) |
| |
|
| | |
| | plt.subplot(2, 2, 2) |
| | plt.hist(df['value'], bins=30, alpha=0.7, color='skyblue') |
| | plt.title('Value Distribution') |
| | plt.xlabel('Value') |
| | plt.ylabel('Frequency') |
| |
|
| | |
| | plt.subplot(2, 2, 3) |
| | sns.boxplot(data=df, x='category', y='value') |
| | plt.title('Value by Category') |
| | plt.xlabel('Category') |
| | plt.ylabel('Value') |
| |
|
| | |
| | plt.subplot(2, 2, 4) |
| | colors = {'A': 'red', 'B': 'blue', 'C': 'green'} |
| | for category in df['category'].unique(): |
| | subset = df[df['category'] == category] |
| | plt.scatter(subset.index, subset['value'], |
| | c=colors[category], label=category, alpha=0.6) |
| | plt.title('Scatter Plot by Category') |
| | plt.xlabel('Index') |
| | plt.ylabel('Value') |
| | plt.legend() |
| |
|
| | plt.tight_layout() |
| | plt.show() |
| |
|
| | |
| | print("\nπ Advanced Analysis:") |
| |
|
| | |
| | correlation = df['value'].corr(df.index) |
| | print(f"Correlation with time: {correlation:.4f}") |
| |
|
| | |
| | rolling_mean = df['value'].rolling(window=30).mean() |
| | print(f"30-day rolling mean (latest): {rolling_mean.iloc[-1]:.2f}") |
| |
|
| | |
| | growth_rate = (df['value'].iloc[-1] - df['value'].iloc[0]) / df['value'].iloc[0] * 100 |
| | print(f"Total growth rate: {growth_rate:.2f}%") |
| |
|
| | print("\nβ
Data analysis complete!") |
| |
|