Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /agents /content_summary_agent.py

Danialebrat

Deploying sentiment analysis project

9858829 about 1 month ago

raw

history blame contribute delete

13.9 kB

	"""
	Content Summary Agent
	Analyzes and summarizes comments for content pieces
	"""
	import pandas as pd
	from typing import Dict, Any, List
	import sys
	from pathlib import Path

	# Add parent directory to path
	parent_dir = Path(__file__).resolve().parent.parent
	sys.path.append(str(parent_dir))

	from agents.base_agent import BaseVisualizationAgent
	from utils.llm_helper import LLMHelper


	class ContentSummaryAgent(BaseVisualizationAgent):
	"""
	Agent that analyzes and summarizes comments for content
	Extracts themes, praise points, complaints, FAQs, and insights
	"""

	def __init__(self, model: str = "gpt-5-nano", temperature: float = 1):
	"""
	Initialize Content Summary Agent

	Args:
	model: LLM model to use
	temperature: Temperature for generation (lower for more focused summaries)
	"""
	super().__init__(name="ContentSummaryAgent", model=model, temperature=temperature)
	self.llm_helper = LLMHelper(model=model, temperature=temperature)

	def validate_input(self, input_data: Dict[str, Any]) -> bool:
	"""
	Validate input data

	Args:
	input_data: Input dictionary

	Returns:
	True if valid, False otherwise
	"""
	required_fields = ['content_sk', 'content_description', 'comments']

	for field in required_fields:
	if field not in input_data:
	self.log_processing(f"Missing required field: {field}", level="error")
	return False

	if not isinstance(input_data['comments'], (list, pd.DataFrame)):
	self.log_processing("Comments must be a list or DataFrame", level="error")
	return False

	return True

	def _prepare_comments_context(self, comments: Any, sentiment_type: str = 'negative') -> str:
	"""
	Prepare comments data for LLM analysis

	Args:
	comments: Comments as DataFrame or list of dicts
	sentiment_type: Type of sentiment to analyze ('negative', 'positive', 'combined')

	Returns:
	Formatted string with comment data
	"""
	# Convert to DataFrame if needed
	if isinstance(comments, list):
	comments_df = pd.DataFrame(comments)
	else:
	comments_df = comments.copy()

	# Filter based on sentiment type
	if sentiment_type == 'negative':
	# Only negative comments
	comments_df = comments_df[
	comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])
	]
	elif sentiment_type == 'positive':
	# Only positive comments
	comments_df = comments_df[
	comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])
	]
	# else: combined - use all comments

	# Limit to reasonable number for API
	if len(comments_df) > 100:
	if sentiment_type == 'combined':
	# For combined: sample from both positive and negative
	negative_comments = comments_df[
	comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])
	].sample(n=min(50, len(comments_df[comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])])), random_state=42)

	positive_comments = comments_df[
	comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])
	].sample(n=min(50, len(comments_df[comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])])), random_state=42)

	comments_df = pd.concat([negative_comments, positive_comments])
	else:
	# For single sentiment type: just sample
	comments_df = comments_df.sample(n=min(100, len(comments_df)), random_state=42)

	# Format comments for analysis
	comments_text = []
	for idx, row in comments_df.iterrows():
	text = row.get('display_text', row.get('original_text', ''))
	sentiment = row.get('sentiment_polarity', 'unknown')
	intent = row.get('intent', 'unknown')

	comment_entry = f"""
	Comment #{idx + 1}:
	- Text: {text[:300]}{'...' if len(str(text)) > 300 else ''}
	- Sentiment: {sentiment}
	- Intent: {intent}
	"""
	comments_text.append(comment_entry)

	return "\n".join(comments_text)

	def _generate_summary_prompt(
	self,
	content_description: str,
	comments_context: str,
	total_comments: int,
	sentiment_type: str = 'negative'
	) -> str:
	"""
	Generate prompt for LLM

	Args:
	content_description: Description of the content
	comments_context: Formatted comments
	total_comments: Total number of comments
	sentiment_type: Type of sentiment being analyzed ('negative', 'positive', 'combined')

	Returns:
	Prompt string
	"""
	# Customize prompt based on sentiment type
	if sentiment_type == 'negative':
	focus_instruction = "Focus on understanding negative feedback, complaints, and issues that need attention."
	elif sentiment_type == 'positive':
	focus_instruction = "Focus on understanding what users love, praise points, and successful elements that should be maintained or amplified."
	else: # combined
	focus_instruction = "Provide a balanced analysis covering both positive feedback and areas for improvement."

	prompt = f"""Analyze the {sentiment_type} comments below for the following content and provide a brief executive summary.

	Content: {content_description}

	Total Comments Analyzed: {total_comments}

	Analysis Focus: {focus_instruction}

	Comments to Analyze:
	{comments_context}

	Task: Provide a concise executive summary in JSON format with the following structure:

	{{
	"executive_summary": "2-3 sentence high-level overview focusing on {sentiment_type} sentiment",
	"main_themes": [
	{{
	"theme": "theme name",
	"sentiment": "positive/negative/mixed",
	"description": "brief description"
	}}
	],
	"praise_points": ["point 1", "point 2", "point 3"],
	"key_complaints": ["complaint 1", "complaint 2", "complaint 3"],
	"frequently_asked_questions": ["question 1", "question 2"],
	"unexpected_insights": ["insight 1", "insight 2"],
	"action_recommendations": [
	{{
	"priority": "high/medium/low",
	"action": "recommended action"
	}}
	]
	}}

	Guidelines:
	- Be concise and actionable
	- Focus on the most important insights from {sentiment_type} comments
	- Limit each list to top 3-5 items
	- If a section has no relevant items, use an empty list
	- Executive summary should capture the overall patterns and key takeaways
	"""
	return prompt

	def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Process comments and generate summary

	Args:
	input_data: {
	'content_sk': content identifier,
	'content_description': content title/description,
	'comments': DataFrame or list of comment dicts,
	'sentiment_type': 'negative', 'positive', or 'combined' (optional, defaults to 'negative')
	}

	Returns:
	{
	'success': bool,
	'content_sk': str,
	'sentiment_type': str,
	'summary': {
	'executive_summary': str,
	'main_themes': list,
	'praise_points': list,
	'key_complaints': list,
	'frequently_asked_questions': list,
	'unexpected_insights': list,
	'action_recommendations': list
	},
	'metadata': {
	'total_comments_analyzed': int,
	'model_used': str,
	'tokens_used': int
	}
	}
	"""
	try:
	# Validate input
	if not self.validate_input(input_data):
	return {
	'success': False,
	'error': 'Invalid input data',
	'content_sk': input_data.get('content_sk', 'unknown')
	}

	content_sk = input_data['content_sk']
	content_description = input_data['content_description']
	comments = input_data['comments']
	sentiment_type = input_data.get('sentiment_type', 'negative') # Default to negative for backward compatibility

	self.log_processing(f"Starting {sentiment_type} analysis for content: {content_sk}")

	# Convert to DataFrame if needed
	if isinstance(comments, list):
	comments_df = pd.DataFrame(comments)
	else:
	comments_df = comments.copy()

	total_comments = len(comments_df)

	if total_comments == 0:
	return {
	'success': True,
	'content_sk': content_sk,
	'sentiment_type': sentiment_type,
	'summary': {
	'executive_summary': 'No comments available for analysis.',
	'main_themes': [],
	'praise_points': [],
	'key_complaints': [],
	'frequently_asked_questions': [],
	'unexpected_insights': [],
	'action_recommendations': []
	},
	'metadata': {
	'total_comments_analyzed': 0,
	'model_used': self.model,
	'tokens_used': 0
	}
	}

	# Prepare comments context based on sentiment type
	comments_context = self._prepare_comments_context(comments_df, sentiment_type)

	# Get count of comments after filtering
	if sentiment_type == 'negative':
	filtered_count = len(comments_df[comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])])
	elif sentiment_type == 'positive':
	filtered_count = len(comments_df[comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])])
	else:
	filtered_count = total_comments

	if filtered_count == 0:
	return {
	'success': True,
	'content_sk': content_sk,
	'sentiment_type': sentiment_type,
	'summary': {
	'executive_summary': f'No {sentiment_type} comments available for analysis.',
	'main_themes': [],
	'praise_points': [],
	'key_complaints': [],
	'frequently_asked_questions': [],
	'unexpected_insights': [],
	'action_recommendations': []
	},
	'metadata': {
	'total_comments_analyzed': 0,
	'model_used': self.model,
	'tokens_used': 0
	}
	}

	# Generate prompt
	prompt = self._generate_summary_prompt(
	content_description,
	comments_context,
	filtered_count,
	sentiment_type
	)

	# System message
	system_message = """You are an expert social media analyst specializing in
	sentiment analysis and community insights. Provide concise, actionable summaries
	that help content creators understand their audience feedback."""

	# Get LLM response
	self.log_processing(f"Calling LLM for {sentiment_type} summary generation")
	response = self.llm_helper.get_structured_completion(
	prompt=prompt,
	system_message=system_message,
	max_retries=3
	)

	if not response['success']:
	return self.handle_error(
	Exception(response.get('error', 'LLM call failed')),
	context=f"content_sk={content_sk}, sentiment_type={sentiment_type}"
	)

	# Extract summary
	summary = response['content']

	# Ensure all expected fields exist
	default_summary = {
	'executive_summary': '',
	'main_themes': [],
	'praise_points': [],
	'key_complaints': [],
	'frequently_asked_questions': [],
	'unexpected_insights': [],
	'action_recommendations': []
	}

	# Merge with defaults
	for key in default_summary:
	if key not in summary:
	summary[key] = default_summary[key]

	self.log_processing(f"Successfully generated {sentiment_type} summary for content: {content_sk}")

	return {
	'success': True,
	'content_sk': content_sk,
	'sentiment_type': sentiment_type,
	'summary': summary,
	'metadata': {
	'total_comments_analyzed': filtered_count,
	'model_used': response['model'],
	'tokens_used': response['usage']['total_tokens']
	}
	}

	except Exception as e:
	return self.handle_error(
	e,
	context=f"content_sk={input_data.get('content_sk', 'unknown')}, sentiment_type={input_data.get('sentiment_type', 'negative')}"
	)