| """ |
| Content Summary Agent |
| Analyzes and summarizes comments for content pieces |
| """ |
| import pandas as pd |
| from typing import Dict, Any, List |
| import sys |
| from pathlib import Path |
|
|
| |
| parent_dir = Path(__file__).resolve().parent.parent |
| sys.path.append(str(parent_dir)) |
|
|
| from agents.base_agent import BaseVisualizationAgent |
| from utils.llm_helper import LLMHelper |
|
|
|
|
| class ContentSummaryAgent(BaseVisualizationAgent): |
| """ |
| Agent that analyzes and summarizes comments for content |
| Extracts themes, praise points, complaints, FAQs, and insights |
| """ |
|
|
| def __init__(self, model: str = "gpt-5-nano", temperature: float = 1): |
| """ |
| Initialize Content Summary Agent |
| |
| Args: |
| model: LLM model to use |
| temperature: Temperature for generation (lower for more focused summaries) |
| """ |
| super().__init__(name="ContentSummaryAgent", model=model, temperature=temperature) |
| self.llm_helper = LLMHelper(model=model, temperature=temperature) |
|
|
| def validate_input(self, input_data: Dict[str, Any]) -> bool: |
| """ |
| Validate input data |
| |
| Args: |
| input_data: Input dictionary |
| |
| Returns: |
| True if valid, False otherwise |
| """ |
| required_fields = ['content_sk', 'content_description', 'comments'] |
|
|
| for field in required_fields: |
| if field not in input_data: |
| self.log_processing(f"Missing required field: {field}", level="error") |
| return False |
|
|
| if not isinstance(input_data['comments'], (list, pd.DataFrame)): |
| self.log_processing("Comments must be a list or DataFrame", level="error") |
| return False |
|
|
| return True |
|
|
| def _prepare_comments_context(self, comments: Any, sentiment_type: str = 'negative') -> str: |
| """ |
| Prepare comments data for LLM analysis |
| |
| Args: |
| comments: Comments as DataFrame or list of dicts |
| sentiment_type: Type of sentiment to analyze ('negative', 'positive', 'combined') |
| |
| Returns: |
| Formatted string with comment data |
| """ |
| |
| if isinstance(comments, list): |
| comments_df = pd.DataFrame(comments) |
| else: |
| comments_df = comments.copy() |
|
|
| |
| if sentiment_type == 'negative': |
| |
| comments_df = comments_df[ |
| comments_df['sentiment_polarity'].isin(['negative', 'very_negative']) |
| ] |
| elif sentiment_type == 'positive': |
| |
| comments_df = comments_df[ |
| comments_df['sentiment_polarity'].isin(['positive', 'very_positive']) |
| ] |
| |
|
|
| |
| if len(comments_df) > 100: |
| if sentiment_type == 'combined': |
| |
| negative_comments = comments_df[ |
| comments_df['sentiment_polarity'].isin(['negative', 'very_negative']) |
| ].sample(n=min(50, len(comments_df[comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])])), random_state=42) |
|
|
| positive_comments = comments_df[ |
| comments_df['sentiment_polarity'].isin(['positive', 'very_positive']) |
| ].sample(n=min(50, len(comments_df[comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])])), random_state=42) |
|
|
| comments_df = pd.concat([negative_comments, positive_comments]) |
| else: |
| |
| comments_df = comments_df.sample(n=min(100, len(comments_df)), random_state=42) |
|
|
| |
| comments_text = [] |
| for idx, row in comments_df.iterrows(): |
| text = row.get('display_text', row.get('original_text', '')) |
| sentiment = row.get('sentiment_polarity', 'unknown') |
| intent = row.get('intent', 'unknown') |
|
|
| comment_entry = f""" |
| Comment #{idx + 1}: |
| - Text: {text[:300]}{'...' if len(str(text)) > 300 else ''} |
| - Sentiment: {sentiment} |
| - Intent: {intent} |
| """ |
| comments_text.append(comment_entry) |
|
|
| return "\n".join(comments_text) |
|
|
| def _generate_summary_prompt( |
| self, |
| content_description: str, |
| comments_context: str, |
| total_comments: int, |
| sentiment_type: str = 'negative' |
| ) -> str: |
| """ |
| Generate prompt for LLM |
| |
| Args: |
| content_description: Description of the content |
| comments_context: Formatted comments |
| total_comments: Total number of comments |
| sentiment_type: Type of sentiment being analyzed ('negative', 'positive', 'combined') |
| |
| Returns: |
| Prompt string |
| """ |
| |
| if sentiment_type == 'negative': |
| focus_instruction = "Focus on understanding negative feedback, complaints, and issues that need attention." |
| elif sentiment_type == 'positive': |
| focus_instruction = "Focus on understanding what users love, praise points, and successful elements that should be maintained or amplified." |
| else: |
| focus_instruction = "Provide a balanced analysis covering both positive feedback and areas for improvement." |
|
|
| prompt = f"""Analyze the {sentiment_type} comments below for the following content and provide a brief executive summary. |
| |
| **Content:** {content_description} |
| |
| **Total Comments Analyzed:** {total_comments} |
| |
| **Analysis Focus:** {focus_instruction} |
| |
| **Comments to Analyze:** |
| {comments_context} |
| |
| **Task:** Provide a concise executive summary in JSON format with the following structure: |
| |
| {{ |
| "executive_summary": "2-3 sentence high-level overview focusing on {sentiment_type} sentiment", |
| "main_themes": [ |
| {{ |
| "theme": "theme name", |
| "sentiment": "positive/negative/mixed", |
| "description": "brief description" |
| }} |
| ], |
| "praise_points": ["point 1", "point 2", "point 3"], |
| "key_complaints": ["complaint 1", "complaint 2", "complaint 3"], |
| "frequently_asked_questions": ["question 1", "question 2"], |
| "unexpected_insights": ["insight 1", "insight 2"], |
| "action_recommendations": [ |
| {{ |
| "priority": "high/medium/low", |
| "action": "recommended action" |
| }} |
| ] |
| }} |
| |
| **Guidelines:** |
| - Be concise and actionable |
| - Focus on the most important insights from {sentiment_type} comments |
| - Limit each list to top 3-5 items |
| - If a section has no relevant items, use an empty list |
| - Executive summary should capture the overall patterns and key takeaways |
| """ |
| return prompt |
|
|
| def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]: |
| """ |
| Process comments and generate summary |
| |
| Args: |
| input_data: { |
| 'content_sk': content identifier, |
| 'content_description': content title/description, |
| 'comments': DataFrame or list of comment dicts, |
| 'sentiment_type': 'negative', 'positive', or 'combined' (optional, defaults to 'negative') |
| } |
| |
| Returns: |
| { |
| 'success': bool, |
| 'content_sk': str, |
| 'sentiment_type': str, |
| 'summary': { |
| 'executive_summary': str, |
| 'main_themes': list, |
| 'praise_points': list, |
| 'key_complaints': list, |
| 'frequently_asked_questions': list, |
| 'unexpected_insights': list, |
| 'action_recommendations': list |
| }, |
| 'metadata': { |
| 'total_comments_analyzed': int, |
| 'model_used': str, |
| 'tokens_used': int |
| } |
| } |
| """ |
| try: |
| |
| if not self.validate_input(input_data): |
| return { |
| 'success': False, |
| 'error': 'Invalid input data', |
| 'content_sk': input_data.get('content_sk', 'unknown') |
| } |
|
|
| content_sk = input_data['content_sk'] |
| content_description = input_data['content_description'] |
| comments = input_data['comments'] |
| sentiment_type = input_data.get('sentiment_type', 'negative') |
|
|
| self.log_processing(f"Starting {sentiment_type} analysis for content: {content_sk}") |
|
|
| |
| if isinstance(comments, list): |
| comments_df = pd.DataFrame(comments) |
| else: |
| comments_df = comments.copy() |
|
|
| total_comments = len(comments_df) |
|
|
| if total_comments == 0: |
| return { |
| 'success': True, |
| 'content_sk': content_sk, |
| 'sentiment_type': sentiment_type, |
| 'summary': { |
| 'executive_summary': 'No comments available for analysis.', |
| 'main_themes': [], |
| 'praise_points': [], |
| 'key_complaints': [], |
| 'frequently_asked_questions': [], |
| 'unexpected_insights': [], |
| 'action_recommendations': [] |
| }, |
| 'metadata': { |
| 'total_comments_analyzed': 0, |
| 'model_used': self.model, |
| 'tokens_used': 0 |
| } |
| } |
|
|
| |
| comments_context = self._prepare_comments_context(comments_df, sentiment_type) |
|
|
| |
| if sentiment_type == 'negative': |
| filtered_count = len(comments_df[comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])]) |
| elif sentiment_type == 'positive': |
| filtered_count = len(comments_df[comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])]) |
| else: |
| filtered_count = total_comments |
|
|
| if filtered_count == 0: |
| return { |
| 'success': True, |
| 'content_sk': content_sk, |
| 'sentiment_type': sentiment_type, |
| 'summary': { |
| 'executive_summary': f'No {sentiment_type} comments available for analysis.', |
| 'main_themes': [], |
| 'praise_points': [], |
| 'key_complaints': [], |
| 'frequently_asked_questions': [], |
| 'unexpected_insights': [], |
| 'action_recommendations': [] |
| }, |
| 'metadata': { |
| 'total_comments_analyzed': 0, |
| 'model_used': self.model, |
| 'tokens_used': 0 |
| } |
| } |
|
|
| |
| prompt = self._generate_summary_prompt( |
| content_description, |
| comments_context, |
| filtered_count, |
| sentiment_type |
| ) |
|
|
| |
| system_message = """You are an expert social media analyst specializing in |
| sentiment analysis and community insights. Provide concise, actionable summaries |
| that help content creators understand their audience feedback.""" |
|
|
| |
| self.log_processing(f"Calling LLM for {sentiment_type} summary generation") |
| response = self.llm_helper.get_structured_completion( |
| prompt=prompt, |
| system_message=system_message, |
| max_retries=3 |
| ) |
|
|
| if not response['success']: |
| return self.handle_error( |
| Exception(response.get('error', 'LLM call failed')), |
| context=f"content_sk={content_sk}, sentiment_type={sentiment_type}" |
| ) |
|
|
| |
| summary = response['content'] |
|
|
| |
| default_summary = { |
| 'executive_summary': '', |
| 'main_themes': [], |
| 'praise_points': [], |
| 'key_complaints': [], |
| 'frequently_asked_questions': [], |
| 'unexpected_insights': [], |
| 'action_recommendations': [] |
| } |
|
|
| |
| for key in default_summary: |
| if key not in summary: |
| summary[key] = default_summary[key] |
|
|
| self.log_processing(f"Successfully generated {sentiment_type} summary for content: {content_sk}") |
|
|
| return { |
| 'success': True, |
| 'content_sk': content_sk, |
| 'sentiment_type': sentiment_type, |
| 'summary': summary, |
| 'metadata': { |
| 'total_comments_analyzed': filtered_count, |
| 'model_used': response['model'], |
| 'tokens_used': response['usage']['total_tokens'] |
| } |
| } |
|
|
| except Exception as e: |
| return self.handle_error( |
| e, |
| context=f"content_sk={input_data.get('content_sk', 'unknown')}, sentiment_type={input_data.get('sentiment_type', 'negative')}" |
| ) |