| """ |
| Metrics calculation for sentiment analysis dashboard |
| Provides key performance indicators and statistical metrics |
| """ |
| import pandas as pd |
| import numpy as np |
| from typing import Dict, List, Tuple |
|
|
|
|
| class SentimentMetrics: |
| """ |
| Calculates various metrics for sentiment analysis |
| """ |
|
|
| @staticmethod |
| def calculate_overall_metrics(df): |
| """ |
| Calculate overall summary metrics |
| |
| Args: |
| df: Sentiment dataframe |
| |
| Returns: |
| dict: Overall metrics |
| """ |
| total_comments = len(df) |
| total_reply_required = df['requires_reply'].sum() if 'requires_reply' in df.columns else 0 |
|
|
| |
| sentiment_dist = df['sentiment_polarity'].value_counts(normalize=True) * 100 |
|
|
| |
| sentiment_weights = { |
| 'very_negative': -2, |
| 'negative': -1, |
| 'neutral': 0, |
| 'positive': 1, |
| 'very_positive': 2 |
| } |
| avg_sentiment_score = df['sentiment_polarity'].map(sentiment_weights).mean() |
|
|
| |
| negative_sentiments = ['negative', 'very_negative'] |
| negative_pct = (df['sentiment_polarity'].isin(negative_sentiments).sum() / total_comments * 100) if total_comments > 0 else 0 |
|
|
| |
| positive_sentiments = ['positive', 'very_positive'] |
| positive_pct = (df['sentiment_polarity'].isin(positive_sentiments).sum() / total_comments * 100) if total_comments > 0 else 0 |
|
|
| return { |
| 'total_comments': total_comments, |
| 'total_reply_required': int(total_reply_required), |
| 'reply_required_pct': (total_reply_required / total_comments * 100) if total_comments > 0 else 0, |
| 'avg_sentiment_score': avg_sentiment_score, |
| 'negative_pct': negative_pct, |
| 'positive_pct': positive_pct, |
| 'sentiment_distribution': sentiment_dist.to_dict() |
| } |
|
|
| @staticmethod |
| def calculate_brand_metrics(df): |
| """ |
| Calculate metrics by brand |
| |
| Args: |
| df: Sentiment dataframe |
| |
| Returns: |
| dict: Metrics by brand |
| """ |
| brand_metrics = {} |
|
|
| for brand in df['brand'].unique(): |
| brand_df = df[df['brand'] == brand] |
| brand_metrics[brand] = SentimentMetrics.calculate_overall_metrics(brand_df) |
|
|
| return brand_metrics |
|
|
| @staticmethod |
| def calculate_platform_metrics(df): |
| """ |
| Calculate metrics by platform |
| |
| Args: |
| df: Sentiment dataframe |
| |
| Returns: |
| dict: Metrics by platform |
| """ |
| platform_metrics = {} |
|
|
| for platform in df['platform'].unique(): |
| platform_df = df[df['platform'] == platform] |
| platform_metrics[platform] = SentimentMetrics.calculate_overall_metrics(platform_df) |
|
|
| return platform_metrics |
|
|
| @staticmethod |
| def calculate_content_engagement_score(content_df): |
| """ |
| Calculate engagement score for a content piece |
| |
| Args: |
| content_df: DataFrame for a single content |
| |
| Returns: |
| float: Engagement score (0-100) |
| """ |
| if len(content_df) == 0: |
| return 0 |
|
|
| |
| |
| |
| |
| |
|
|
| comment_count = len(content_df) |
| comment_score = min(comment_count / 100 * 30, 30) |
|
|
| |
| sentiment_weights = { |
| 'very_negative': -2, |
| 'negative': -1, |
| 'neutral': 0, |
| 'positive': 1, |
| 'very_positive': 2 |
| } |
| avg_sentiment = content_df['sentiment_polarity'].map(sentiment_weights).mean() |
| sentiment_score = ((avg_sentiment + 2) / 4) * 40 |
|
|
| |
| unique_intents = content_df['intent'].str.split(',').explode().str.strip().nunique() |
| intent_score = min(unique_intents / 8 * 20, 20) |
|
|
| |
| reply_rate = content_df['requires_reply'].sum() / len(content_df) if len(content_df) > 0 else 0 |
| interaction_score = reply_rate * 10 |
|
|
| total_score = comment_score + sentiment_score + intent_score + interaction_score |
| return round(total_score, 2) |
|
|
| @staticmethod |
| def get_sentiment_health_status(negative_pct): |
| """ |
| Determine health status based on negative sentiment percentage |
| |
| Args: |
| negative_pct: Percentage of negative sentiments |
| |
| Returns: |
| tuple: (status, color) |
| """ |
| if negative_pct < 10: |
| return ("Excellent", "green") |
| elif negative_pct < 20: |
| return ("Good", "lightgreen") |
| elif negative_pct < 30: |
| return ("Fair", "orange") |
| elif negative_pct < 50: |
| return ("Poor", "darkorange") |
| else: |
| return ("Critical", "red") |
|
|
| @staticmethod |
| def calculate_intent_priority_score(intent_counts): |
| """ |
| Calculate priority score for different intents |
| |
| Args: |
| intent_counts: Dictionary of intent counts |
| |
| Returns: |
| dict: Priority scores for each intent |
| """ |
| |
| priority_weights = { |
| 'feedback_negative': 5, |
| 'request': 4, |
| 'question': 4, |
| 'suggestion': 3, |
| 'praise': 2, |
| 'humor_sarcasm': 1, |
| 'off_topic': 1, |
| 'spam_selfpromo': 0 |
| } |
|
|
| priority_scores = {} |
| for intent, count in intent_counts.items(): |
| weight = priority_weights.get(intent, 1) |
| priority_scores[intent] = count * weight |
|
|
| return priority_scores |
|
|
| @staticmethod |
| def calculate_response_urgency(df): |
| """ |
| Calculate response urgency metrics |
| |
| Args: |
| df: Sentiment dataframe |
| |
| Returns: |
| dict: Urgency metrics |
| """ |
| reply_required_df = df[df['requires_reply'] == True] |
|
|
| if len(reply_required_df) == 0: |
| return { |
| 'urgent_count': 0, |
| 'high_priority_count': 0, |
| 'medium_priority_count': 0, |
| 'low_priority_count': 0 |
| } |
|
|
| |
| urgent = reply_required_df[ |
| reply_required_df['sentiment_polarity'].isin(['very_negative', 'negative']) |
| ] |
| high_priority = reply_required_df[ |
| (reply_required_df['sentiment_polarity'] == 'neutral') & |
| (reply_required_df['intent'].str.contains('feedback_negative|request', na=False)) |
| ] |
| medium_priority = reply_required_df[ |
| reply_required_df['sentiment_polarity'] == 'positive' |
| ] |
| low_priority = reply_required_df[ |
| reply_required_df['sentiment_polarity'] == 'very_positive' |
| ] |
|
|
| return { |
| 'urgent_count': len(urgent), |
| 'high_priority_count': len(high_priority), |
| 'medium_priority_count': len(medium_priority), |
| 'low_priority_count': len(low_priority) |
| } |
|
|
| @staticmethod |
| def calculate_trend_indicator(df, current_period, previous_period, metric='sentiment_score'): |
| """ |
| Calculate trend indicator comparing two periods |
| |
| Args: |
| df: Sentiment dataframe |
| current_period: Tuple of (start_date, end_date) for current period |
| previous_period: Tuple of (start_date, end_date) for previous period |
| metric: Metric to compare |
| |
| Returns: |
| dict: Trend information |
| """ |
| if 'comment_timestamp' not in df.columns: |
| return {'trend': 'stable', 'change': 0} |
|
|
| |
| current_df = df[ |
| (df['comment_timestamp'] >= pd.Timestamp(current_period[0])) & |
| (df['comment_timestamp'] <= pd.Timestamp(current_period[1])) |
| ] |
| previous_df = df[ |
| (df['comment_timestamp'] >= pd.Timestamp(previous_period[0])) & |
| (df['comment_timestamp'] <= pd.Timestamp(previous_period[1])) |
| ] |
|
|
| if len(current_df) == 0 or len(previous_df) == 0: |
| return {'trend': 'stable', 'change': 0} |
|
|
| |
| if metric == 'sentiment_score': |
| |
| sentiment_weights = { |
| 'very_negative': -2, 'negative': -1, 'neutral': 0, |
| 'positive': 1, 'very_positive': 2 |
| } |
| current_value = current_df['sentiment_polarity'].map(sentiment_weights).mean() |
| previous_value = previous_df['sentiment_polarity'].map(sentiment_weights).mean() |
| else: |
| current_value = len(current_df) |
| previous_value = len(previous_df) |
|
|
| |
| change = ((current_value - previous_value) / previous_value * 100) if previous_value != 0 else 0 |
|
|
| |
| if abs(change) < 5: |
| trend = 'stable' |
| elif change > 0: |
| trend = 'improving' if metric == 'sentiment_score' else 'increasing' |
| else: |
| trend = 'declining' if metric == 'sentiment_score' else 'decreasing' |
|
|
| return { |
| 'trend': trend, |
| 'change': round(change, 2), |
| 'current_value': round(current_value, 2), |
| 'previous_value': round(previous_value, 2) |
| } |