Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /agents /helpscout_summary_agent.py

Danialebrat

Adding HelpScout to UI

58db664 about 1 month ago

raw

history blame contribute delete

12.8 kB

	"""
	HelpScout Summary Agent
	Generates a page-level summary report from filtered HelpScout conversations.
	Analyses the already-extracted SUMMARY fields to surface patterns and insights
	beyond the pre-tagged topics / sentiments.
	"""
	import json
	import sys
	from pathlib import Path
	from typing import Any, Dict

	import pandas as pd

	# Ensure visualization/ is on sys.path so agents., utils. imports resolve
	_parent = Path(__file__).resolve().parent.parent
	if str(_parent) not in sys.path:
	sys.path.insert(0, str(_parent))

	from agents.base_agent import BaseVisualizationAgent
	from utils.llm_helper import LLMHelper
	from utils.helpscout_utils import topic_label, load_topic_taxonomy


	class HelpScoutSummaryAgent(BaseVisualizationAgent):
	"""
	Produces an executive summary report from a filtered set of HelpScout
	conversations by reading their SUMMARY fields through an LLM.
	"""

	MAX_SUMMARY_CHARS = 250 # per conversation summary sent to LLM

	def __init__(self, model: str = "gpt-5-nano", temperature: float = 1,
	max_conversations: int = 300):
	super().__init__(name="HelpScoutSummaryAgent", model=model, temperature=temperature)
	self.llm_helper = LLMHelper(model=model, temperature=temperature)
	self.max_conversations = max_conversations
	self.taxonomy = load_topic_taxonomy()

	# ─────────────────────────────────────────────────────────────
	# BaseVisualizationAgent interface
	# ─────────────────────────────────────────────────────────────

	def validate_input(self, input_data: Dict[str, Any]) -> bool:
	if "conversations" not in input_data:
	self.log_processing("Missing 'conversations' key", level="error")
	return False
	if not isinstance(input_data["conversations"], pd.DataFrame):
	self.log_processing("'conversations' must be a DataFrame", level="error")
	return False
	if "summary" not in input_data["conversations"].columns:
	self.log_processing("DataFrame must contain a 'summary' column", level="error")
	return False
	return True

	def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Generate an aggregate summary report from filtered HelpScout conversations.

	Args:
	input_data: {
	'conversations': pd.DataFrame (must have 'summary' column),
	'filter_description': str (human-readable applied filters),
	'max_conversations': int (optional; overrides instance default),
	}

	Returns:
	{
	'success': bool,
	'summary': {
	'executive_summary': str,
	'top_themes': [{'theme': str, 'description': str, 'prevalence': str}],
	'top_complaints': [str],
	'unexpected_insights': [str],
	'recommended_actions': [{'priority': str, 'action': str, 'rationale': str}],
	'notable_quotes': [str],
	},
	'metadata': {
	'total_conversations_analyzed': int,
	'model_used': str,
	'tokens_used': int,
	'filter_applied': str,
	},
	'error': str \| None,
	}
	"""
	try:
	if not self.validate_input(input_data):
	return {"success": False, "error": "Invalid input data", "summary": None}

	df = input_data["conversations"]
	filter_desc = input_data.get("filter_description", "No filters applied")
	max_convs = input_data.get("max_conversations", self.max_conversations)

	total_available = len(df)

	if total_available == 0:
	return self._empty_result(filter_desc)

	# Sample if over cap — stratified by sentiment to preserve signal
	df_sample = self._stratified_sample(df, max_convs)
	n_analyzed = len(df_sample)

	self.log_processing(
	f"Analysing {n_analyzed} of {total_available} conversations"
	f" (filter: {filter_desc[:60]})"
	)

	# Build aggregate context for the LLM
	agg_context = self._build_aggregate_context(df_sample, df)
	prompt = self._build_prompt(agg_context, filter_desc, n_analyzed)

	system_msg = (
	"You are an expert customer support analyst for Musora, "
	"a music education platform (Drumeo, Pianote, Guitareo, Singeo, PlayBass). "
	"Your role is to synthesize customer support conversation summaries "
	"and surface actionable insights that go beyond simple tagging."
	)

	response = self.llm_helper.get_structured_completion(
	prompt=prompt,
	system_message=system_msg,
	max_retries=3,
	)

	if not response["success"]:
	return self.handle_error(
	Exception(response.get("error", "LLM call failed")),
	context=f"filter={filter_desc[:60]}"
	)

	summary = response["content"]
	summary = self._ensure_defaults(summary)

	return {
	"success": True,
	"summary": summary,
	"metadata": {
	"total_conversations_analyzed": n_analyzed,
	"total_available": total_available,
	"model_used": response["model"],
	"tokens_used": response["usage"]["total_tokens"],
	"filter_applied": filter_desc,
	},
	"error": None,
	}

	except Exception as e:
	return self.handle_error(e, context=input_data.get("filter_description", ""))

	# ─────────────────────────────────────────────────────────────
	# Private helpers
	# ─────────────────────────────────────────────────────────────

	def _stratified_sample(self, df: pd.DataFrame, cap: int) -> pd.DataFrame:
	"""Stratified sample by sentiment to keep signal diversity."""
	if len(df) <= cap:
	return df
	try:
	strat_col = "sentiment_polarity"
	if strat_col in df.columns and df[strat_col].nunique() > 1:
	# Proportional allocation per sentiment group
	groups = df.groupby(strat_col, group_keys=False)
	sampled = groups.apply(
	lambda g: g.sample(
	n=max(1, int(cap * len(g) / len(df))),
	random_state=42,
	)
	)
	return sampled.head(cap)
	except Exception:
	pass
	return df.sample(n=cap, random_state=42)

	def _build_aggregate_context(self, df_sample: pd.DataFrame,
	df_full: pd.DataFrame) -> str:
	"""Build a text block with aggregate stats + conversation summaries."""
	total = len(df_full)
	n_sample = len(df_sample)

	# Aggregate stats from the full filtered set
	stats = []
	if "sentiment_polarity" in df_full.columns:
	sent_counts = df_full["sentiment_polarity"].value_counts()
	sent_pct = (sent_counts / total * 100).round(1)
	stats.append("Sentiment breakdown: " +
	", ".join(f"{s} {pct}%" for s, pct in sent_pct.items()))

	if "topics" in df_full.columns:
	from utils.helpscout_utils import explode_topics
	exploded = explode_topics(df_full)
	if not exploded.empty:
	top_topics = exploded["topic_id"].value_counts().head(8)
	topic_strs = [f"{topic_label(t, self.taxonomy)} ({c})" for t, c in top_topics.items()]
	stats.append("Top topics: " + ", ".join(topic_strs))

	from utils.helpscout_utils import boolean_flag_counts
	flags = boolean_flag_counts(df_full)
	flag_parts = []
	if flags["is_refund_request"]:
	flag_parts.append(f"Refund requests: {flags['is_refund_request']}")
	if flags["is_cancellation"]:
	flag_parts.append(f"Cancellations: {flags['is_cancellation']}")
	if flags["is_membership"]:
	flag_parts.append(f"Membership joins: {flags['is_membership']}")
	if flag_parts:
	stats.append(", ".join(flag_parts))

	if "duration_hours" in df_full.columns:
	avg_dur = df_full["duration_hours"].mean()
	stats.append(f"Average conversation duration: {avg_dur:.1f} hours")

	stats_block = "\n".join(stats)

	# Individual summaries (capped per conversation)
	summaries = []
	for i, row in enumerate(df_sample.itertuples(), 1):
	s = getattr(row, "summary", None) or ""
	s = str(s).strip()
	if s:
	s = s[:self.MAX_SUMMARY_CHARS] + ("…" if len(s) > self.MAX_SUMMARY_CHARS else "")
	sent = getattr(row, "sentiment_polarity", "")
	summaries.append(f"[{i}] ({sent}) {s}")

	summaries_block = "\n".join(summaries) if summaries else "No summaries available."

	note = (f"Note: Showing {n_sample} of {total} matched conversations."
	if n_sample < total else f"Showing all {total} matched conversations.")

	return f"""=== AGGREGATE STATISTICS ===
	{stats_block}
	{note}

	=== CONVERSATION SUMMARIES ===
	{summaries_block}"""

	def _build_prompt(self, context: str, filter_desc: str,
	n_analyzed: int) -> str:
	return f"""Analyze the following {n_analyzed} HelpScout customer support conversation summaries for Musora.

	Applied filters: {filter_desc}

	{context}

	Your task: Synthesize these conversations and produce insights that go BEYOND the pre-extracted tags.
	Look for underlying patterns, recurring pain points, emotional signals, product gaps, and operational issues
	that would not be obvious from simple topic counts alone.

	Respond in JSON with this exact structure:
	{{
	"executive_summary": "3-5 sentence high-level synthesis of what customers are experiencing",
	"top_themes": [
	{{
	"theme": "Short theme name (not a topic tag)",
	"description": "What customers are actually saying and feeling about this",
	"prevalence": "Rough estimate: e.g. 'Appears in ~30% of conversations'"
	}}
	],
	"top_complaints": [
	"Specific actionable complaint statement (not generic)"
	],
	"unexpected_insights": [
	"A pattern, contradiction, or insight that would surprise a product manager"
	],
	"notable_quotes": [
	"Paraphrased quote or representative statement from conversations (not verbatim)"
	]
	}}

	Guidelines:
	- Top themes: 5-8 items, each distinct from pre-extracted topics
	- Top complaints: 5-8 bullet points, specific and actionable
	- Unexpected insights: 3-5 items, must genuinely go beyond the tag taxonomy
	- Notable quotes: 3-5 representative paraphrases
	- If a section has fewer relevant items, use fewer — quality over quantity
	"""

	@staticmethod
	def _ensure_defaults(summary: dict) -> dict:
	defaults = {
	"executive_summary": "",
	"top_themes": [],
	"top_complaints": [],
	"unexpected_insights": [],
	"notable_quotes": [],
	}
	for k, v in defaults.items():
	if k not in summary:
	summary[k] = v
	return summary

	def _empty_result(self, filter_desc: str) -> dict:
	return {
	"success": True,
	"summary": {
	"executive_summary": "No conversations matched the selected filters.",
	"top_themes": [],
	"top_complaints": [],
	"unexpected_insights": [],
	"notable_quotes": [],
	},
	"metadata": {
	"total_conversations_analyzed": 0,
	"total_available": 0,
	"model_used": self.model,
	"tokens_used": 0,
	"filter_applied": filter_desc,
	},
	"error": None,
	}