Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /components /helpscout_analysis.py

Danialebrat

Adding members sections

5f1963f 26 days ago

raw

history blame contribute delete

23.9 kB

	"""
	HelpScout Analysis Page
	Purpose-built analysis page for HelpScout conversations.
	Mirrors the SA page architecture: filter → fetch → charts → LLM summary → export.
	One page-level summary report for the entire filtered set.
	"""
	import sys
	from datetime import date, timedelta
	from pathlib import Path

	import pandas as pd
	import streamlit as st

	parent_dir = Path(__file__).resolve().parent.parent
	sys.path.append(str(parent_dir))

	from visualizations.helpscout_charts import HelpScoutCharts
	from utils.helpscout_utils import (
	boolean_flag_counts, build_filter_description, topic_label, load_topic_taxonomy
	)
	from agents.helpscout_summary_agent import HelpScoutSummaryAgent


	def render_helpscout_analysis(data_loader):
	"""
	Render the HelpScout Analysis page.

	Args:
	data_loader: HelpScoutDataLoader instance
	"""
	st.title("🔬 HelpScout Analysis")
	st.markdown(
	"Deep-dive into customer support conversations. Apply filters, fetch the data, "
	"explore distributions, and generate an AI-powered summary report."
	)
	st.markdown("---")

	charts = HelpScoutCharts()
	taxonomy = load_topic_taxonomy()

	# ── Filter options from already-loaded dashboard df ───────────────────────
	hs_df = st.session_state.get("helpscout_df")
	if hs_df is None or hs_df.empty:
	st.warning("HelpScout dashboard data not loaded yet. Please wait for the app to initialise.")
	return

	filter_options = data_loader.get_filter_options(hs_df)

	# ── Filters ───────────────────────────────────────────────────────────────
	st.markdown("### 🎯 Filters")

	row1_col1, row1_col2 = st.columns(2)
	with row1_col1:
	min_date = hs_df["first_message_at"].min().date() if "first_message_at" in hs_df.columns and not hs_df.empty else date.today() - timedelta(days=60)
	max_date = hs_df["first_message_at"].max().date() if "first_message_at" in hs_df.columns and not hs_df.empty else date.today()
	default_start = max(min_date, max_date - timedelta(days=data_loader.default_date_range_days))
	date_range = st.date_input(
	"Date Range (First Message At)",
	value=(default_start, max_date),
	min_value=min_date, max_value=max_date,
	key="hs_analysis_date_range",
	)
	with row1_col2:
	top_n_options = [("All", 0), ("50", 50), ("100", 100), ("200", 200), ("500", 500), ("1000", 1000)]
	top_n_label = st.selectbox(
	"Limit Results",
	options=[x[0] for x in top_n_options],
	index=0,
	help="Limit number of conversations fetched. 'All' fetches everything matching your filters.",
	key="hs_analysis_top_n",
	)
	top_n = dict(top_n_options)[top_n_label]

	row2_col1, row2_col2, row2_col3, row2_col4 = st.columns(4)
	with row2_col1:
	topic_options = filter_options.get("topics", [])
	topic_labels_map = {t: topic_label(t, taxonomy) for t in topic_options}
	selected_topic_labels = st.multiselect(
	"Topics",
	options=[topic_labels_map[t] for t in topic_options],
	default=[],
	key="hs_analysis_topics",
	)
	label_to_id = {v: k for k, v in topic_labels_map.items()}
	selected_topics = [label_to_id[l] for l in selected_topic_labels if l in label_to_id]

	with row2_col2:
	selected_sentiments = st.multiselect(
	"Sentiments",
	options=filter_options.get("sentiments", []),
	default=[],
	key="hs_analysis_sentiments",
	)

	with row2_col3:
	selected_statuses = st.multiselect(
	"Status",
	options=filter_options.get("statuses", []),
	default=[],
	key="hs_analysis_statuses",
	)

	with row2_col4:
	selected_sources = st.multiselect(
	"Source Type",
	options=filter_options.get("sources", []),
	default=[],
	key="hs_analysis_sources",
	)

	row3_col1, row3_col2, row3_col3, row3_col4 = st.columns(4)
	with row3_col1:
	refund_only = st.checkbox("Refund Requests Only", key="hs_analysis_refund")
	with row3_col2:
	cancel_only = st.checkbox("Cancellations Only", key="hs_analysis_cancel")
	with row3_col3:
	membership_only = st.checkbox("Membership Joins Only", key="hs_analysis_membership")
	with row3_col4:
	member_status_filter = st.selectbox(
	"Customer Type",
	options=["All", "Members Only", "Non-Members Only"],
	index=0,
	help="Members are customers whose email matches a Musora user account.",
	key="hs_analysis_member_status",
	)

	st.markdown("---")

	# ── Fetch button ─────────────────────────────────────────────────────────
	dr_tuple = (str(date_range[0]), str(date_range[1])) if date_range and len(date_range) == 2 else None

	fetch_key = (
	dr_tuple,
	tuple(sorted(selected_sentiments)),
	tuple(sorted(selected_topics)),
	tuple(sorted(selected_statuses)),
	tuple(sorted(selected_sources)),
	bool(refund_only), bool(cancel_only), bool(membership_only),
	top_n,
	)

	has_data = (
	"hs_analysis_df" in st.session_state
	and st.session_state.get("hs_analysis_fetch_key") == fetch_key
	and not st.session_state["hs_analysis_df"].empty
	)

	fetch_col, info_col = st.columns([1, 3])
	with fetch_col:
	fetch_clicked = st.button("🚀 Fetch Data", type="primary",
	use_container_width=True, key="hs_fetch_btn")
	with info_col:
	if has_data:
	n = len(st.session_state["hs_analysis_df"])
	st.success(f"✅ Showing {n:,} conversations matching your filters")
	elif not fetch_clicked:
	st.info("👆 Set your filters and click Fetch Data to query Snowflake.")

	if fetch_clicked:
	with st.spinner("Fetching HelpScout data from Snowflake…"):
	result_df = data_loader.load_analysis_data(
	sentiments=selected_sentiments or None,
	topics=selected_topics or None,
	refund_only=refund_only,
	cancel_only=cancel_only,
	membership_only=membership_only,
	statuses=selected_statuses or None,
	sources=selected_sources or None,
	date_range=(date_range[0], date_range[1]) if dr_tuple else None,
	top_n=top_n or None,
	)
	applied_filters = {
	"date_range": (date_range[0], date_range[1]) if dr_tuple else None,
	"sentiments": selected_sentiments,
	"topics": selected_topics,
	"statuses": selected_statuses,
	"sources": selected_sources,
	"refund_only": refund_only,
	"cancel_only": cancel_only,
	"membership_only": membership_only,
	"member_status": member_status_filter,
	}
	st.session_state["hs_analysis_df"] = result_df
	st.session_state["hs_analysis_fetch_key"] = fetch_key
	st.session_state["hs_analysis_filter_desc"] = build_filter_description(applied_filters, taxonomy)
	# Invalidate any prior summary when filters change
	st.session_state.pop("hs_analysis_summary", None)
	st.session_state.pop("hs_analysis_summary_key", None)
	st.session_state["hs_analysis_page"] = 1
	st.rerun()

	if not has_data and not fetch_clicked:
	return

	analysis_df = st.session_state.get("hs_analysis_df", pd.DataFrame()).copy()
	filter_desc = st.session_state.get("hs_analysis_filter_desc", "No filters applied")

	# Derive is_member from dashboard df (always, so breakdown charts work on "All" too)
	if "customer_email" in analysis_df.columns:
	hs_dashboard = st.session_state.get("helpscout_df", pd.DataFrame())
	if "is_member" in hs_dashboard.columns and not hs_dashboard.empty:
	member_emails = set(
	hs_dashboard[hs_dashboard["is_member"]]["customer_email"].str.lower().dropna()
	)
	analysis_df["is_member"] = analysis_df["customer_email"].str.lower().isin(member_emails)
	# Apply filter when a specific group is selected
	if member_status_filter == "Members Only":
	analysis_df = analysis_df[analysis_df["is_member"]]
	elif member_status_filter == "Non-Members Only":
	analysis_df = analysis_df[~analysis_df["is_member"]]
	elif member_status_filter != "All":
	st.warning("Member data not available — customer emails could not be matched to Musora records.")

	if analysis_df.empty:
	st.warning("No conversations found for the selected filters. Try adjusting and re-fetching.")
	return

	total = len(analysis_df)
	flags = boolean_flag_counts(analysis_df)
	neg_pct = analysis_df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() / total * 100
	avg_dur = float(analysis_df["duration_hours"].mean()) if "duration_hours" in analysis_df.columns else 0.0

	# ── KPI Row ───────────────────────────────────────────────────────────────
	st.markdown("### 📊 Overview")
	k1, k2, k3, k4, k5 = st.columns(5)
	k1.metric("Conversations", f"{total:,}")
	k2.metric("Negative %", f"{neg_pct:.1f}%")
	k3.metric("Refund Requests", f"{flags['is_refund_request']:,}")
	k4.metric("Cancellations", f"{flags['is_cancellation']:,}")
	k5.metric("Avg Duration (h)", f"{avg_dur:.1f}")

	st.caption(f"Active filters: {filter_desc}")
	st.markdown("---")

	# ── Distributions ─────────────────────────────────────────────────────────
	st.markdown("### 📈 Distributions")

	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_sentiment_pie_chart(analysis_df, title="Sentiment Distribution"),
	use_container_width=True, key="hs_analysis_sent_pie")
	with col2:
	st.plotly_chart(charts.create_topic_bar_chart(analysis_df, title="Topic Distribution"),
	use_container_width=True, key="hs_analysis_topic_bar")

	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_topic_sentiment_heatmap(analysis_df),
	use_container_width=True, key="hs_analysis_topic_heatmap")
	with col2:
	st.plotly_chart(charts.create_boolean_flags_chart(analysis_df),
	use_container_width=True, key="hs_analysis_flags")

	if "emotions" in analysis_df.columns and analysis_df["emotions"].notna().any():
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_emotion_bar_chart(analysis_df, title="Emotion Distribution"),
	use_container_width=True, key="hs_analysis_emotion")
	with col2:
	st.plotly_chart(charts.create_volume_timeline(analysis_df, title="Volume Over Time"),
	use_container_width=True, key="hs_analysis_vol_timeline")
	else:
	st.plotly_chart(charts.create_volume_timeline(analysis_df, title="Volume Over Time"),
	use_container_width=True, key="hs_analysis_vol_timeline2")

	# Member vs Non-Member breakdown (only when both groups are present in the view)
	if "is_member" in analysis_df.columns:
	st.markdown("### 👤 Member vs Non-Member")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_member_status_chart(analysis_df,
	title="Member vs Non-Member"),
	use_container_width=True, key="hs_analysis_member_pie")
	with col2:
	st.plotly_chart(charts.create_member_sentiment_chart(analysis_df,
	title="Sentiment by Member Status"),
	use_container_width=True, key="hs_analysis_member_sentiment")
	st.plotly_chart(charts.create_member_topic_chart(analysis_df,
	title="Top Topics by Member Status"),
	use_container_width=True, key="hs_analysis_member_topics")

	st.markdown("---")

	# ── AI Summary Report ─────────────────────────────────────────────────────
	st.markdown("### 🤖 AI Summary Report")
	st.markdown(
	"Generate an LLM-powered report from the conversation summaries matching your filters. "
	"The AI looks beyond the pre-extracted tags to surface patterns, pain points, "
	"and actionable insights."
	)

	summary_available = (
	"hs_analysis_summary" in st.session_state
	and st.session_state.get("hs_analysis_summary_key") == fetch_key
	and st.session_state["hs_analysis_summary"] is not None
	)

	gen_col, pdf_col = st.columns([1, 1])
	with gen_col:
	gen_clicked = st.button("🧠 Generate Summary Report", type="primary",
	use_container_width=True, key="hs_gen_summary_btn")
	with pdf_col:
	export_pdf_clicked = st.button("📄 Export as PDF", use_container_width=True,
	key="hs_export_pdf_btn")

	if gen_clicked:
	with st.spinner("Analysing conversations with AI… this may take 20–40 seconds…"):
	agent = HelpScoutSummaryAgent()
	result = agent.process({
	"conversations": analysis_df,
	"filter_description": filter_desc,
	})
	st.session_state["hs_analysis_summary"] = result
	st.session_state["hs_analysis_summary_key"] = fetch_key
	st.rerun()

	if export_pdf_clicked:
	with st.spinner("Generating PDF…"):
	try:
	from utils.helpscout_pdf import HelpScoutAnalysisPDF
	import datetime
	summary_result = st.session_state.get("hs_analysis_summary")
	exporter = HelpScoutAnalysisPDF()
	pdf_bytes = exporter.generate_report(
	analysis_df,
	filter_info={"Filters": filter_desc, "Total Conversations": str(total)},
	summary_result=summary_result,
	)
	filename = f"helpscout_analysis_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.pdf"
	st.success("Report generated!")
	st.download_button(
	label="Download Analysis PDF",
	data=pdf_bytes,
	file_name=filename,
	mime="application/pdf",
	use_container_width=True,
	key="hs_download_pdf_btn",
	)
	except Exception as e:
	st.error(f"Failed to generate PDF: {e}")
	st.exception(e)

	# Render the summary if available
	if summary_available:
	result = st.session_state["hs_analysis_summary"]
	_render_summary_report(result)

	st.markdown("---")

	# ── Conversation Cards ────────────────────────────────────────────────────
	st.markdown("### 💬 Conversations")

	if "hs_analysis_page" not in st.session_state:
	st.session_state.hs_analysis_page = 1

	per_page = 10
	total_pages = max(1, (total + per_page - 1) // per_page)

	if total > per_page:
	st.info(f"Page {st.session_state.hs_analysis_page} of {total_pages} ({total:,} conversations)")
	pc1, pc2, pc3 = st.columns([1, 2, 1])
	with pc1:
	if st.button("⬅️ Previous", key="hs_prev_top",
	disabled=st.session_state.hs_analysis_page == 1):
	st.session_state.hs_analysis_page -= 1
	st.rerun()
	with pc2:
	st.markdown(
	f"<div style='text-align:center;padding-top:8px;'>"
	f"Page {st.session_state.hs_analysis_page} / {total_pages}</div>",
	unsafe_allow_html=True,
	)
	with pc3:
	if st.button("Next ➡️", key="hs_next_top",
	disabled=st.session_state.hs_analysis_page >= total_pages):
	st.session_state.hs_analysis_page += 1
	st.rerun()
	st.markdown("---")

	start = (st.session_state.hs_analysis_page - 1) * per_page
	end = min(start + per_page, total)
	page_df = analysis_df.iloc[start:end]

	for _, row in page_df.iterrows():
	_render_conversation_card(row, taxonomy)

	# Bottom pagination
	if total > per_page:
	pb1, pb2, pb3 = st.columns([1, 2, 1])
	with pb1:
	if st.button("⬅️ Previous", key="hs_prev_bot",
	disabled=st.session_state.hs_analysis_page == 1):
	st.session_state.hs_analysis_page -= 1
	st.rerun()
	with pb2:
	st.markdown(
	f"<div style='text-align:center;padding-top:8px;'>"
	f"Page {st.session_state.hs_analysis_page} / {total_pages}</div>",
	unsafe_allow_html=True,
	)
	with pb3:
	if st.button("Next ➡️", key="hs_next_bot",
	disabled=st.session_state.hs_analysis_page >= total_pages):
	st.session_state.hs_analysis_page += 1
	st.rerun()

	st.markdown("---")

	# ── Export CSV ────────────────────────────────────────────────────────────
	st.markdown("### 💾 Export Data")
	export_cols = [c for c in ["conversation_id", "customer_email", "first_message_at",
	"status", "sentiment_polarity", "topics", "summary",
	"is_refund_request", "is_cancellation", "is_membership",
	"duration_hours"] if c in analysis_df.columns]
	csv = analysis_df[export_cols].to_csv(index=False)
	st.download_button(
	label="📥 Download as CSV",
	data=csv,
	file_name=f"helpscout_analysis_{total}conversations.csv",
	mime="text/csv",
	key="hs_csv_download",
	)


	# ─────────────────────────────────────────────────────────────────────────────
	# Helper renderers
	# ─────────────────────────────────────────────────────────────────────────────

	def _render_summary_report(result: dict):
	"""Render the LLM summary result with nice formatting."""
	if not result.get("success"):
	st.error(f"AI analysis failed: {result.get('error', 'Unknown error')}")
	return

	summary = result.get("summary", {})
	meta = result.get("metadata", {})

	with st.container():
	st.markdown("---")
	st.markdown("#### 📋 Executive Summary")
	st.info(summary.get("executive_summary", ""))

	col1, col2 = st.columns(2)

	with col1:
	themes = summary.get("top_themes", [])
	if themes:
	st.markdown("#### 🎯 Top Themes")
	for t in themes:
	st.markdown(
	f"{t.get('theme', '')} _{t.get('prevalence', '')}_ \n"
	f"{t.get('description', '')}"
	)
	st.markdown("")

	insights = summary.get("unexpected_insights", [])
	if insights:
	st.markdown("#### 💡 Unexpected Insights")
	for ins in insights:
	st.markdown(f"- {ins}")

	with col2:
	complaints = summary.get("top_complaints", [])
	if complaints:
	st.markdown("#### ⚠️ Top Complaints")
	for c in complaints:
	st.markdown(f"- {c}")

	quotes = summary.get("notable_quotes", [])
	if quotes:
	st.markdown("#### 💬 Notable Quotes")
	for q in quotes:
	st.markdown(f"> {q}")

	with st.expander("ℹ️ Analysis Metadata"):
	mc1, mc2, mc3 = st.columns(3)
	mc1.metric("Conversations Analysed", meta.get("total_conversations_analyzed", 0))
	mc2.metric("Model Used", meta.get("model_used", "N/A"))
	mc3.metric("Tokens Used", meta.get("tokens_used", 0))
	if meta.get("total_available", 0) > meta.get("total_conversations_analyzed", 0):
	st.caption(
	f"Sampled {meta['total_conversations_analyzed']} of "
	f"{meta['total_available']} conversations for this analysis."
	)


	def _render_conversation_card(row, taxonomy: dict):
	"""Render a single conversation card."""
	sent = str(row.get("sentiment_polarity", "unknown"))
	sent_emoji = {
	"very_positive": "🟢", "positive": "🟩", "neutral": "🟡",
	"negative": "🟠", "very_negative": "🔴",
	}.get(sent, "⚪")

	topics_list = row.get("topics_list") or []
	topic_labels_str = ", ".join(topic_label(t, taxonomy) for t in topics_list) if topics_list else "—"

	first_name = str(row.get("customer_first") or "").strip()
	last_name = str(row.get("customer_last") or "").strip()
	customer_str = f"{first_name} {last_name[:1]}." if first_name or last_name else "Anonymous"

	first_msg = row.get("first_message_at")
	date_str = first_msg.strftime("%Y-%m-%d") if hasattr(first_msg, "strftime") else str(first_msg or "")

	flags = []
	if row.get("is_refund_request"): flags.append("💰 Refund")
	if row.get("is_cancellation"): flags.append("🚫 Cancel")
	if row.get("is_membership"): flags.append("✅ Membership")
	flags_str = " \| ".join(flags) if flags else ""

	with st.expander(
	f"{sent_emoji} {customer_str} — {topic_labels_str} \| {sent.replace('_', ' ').title()} \| {date_str}"
	+ (f" [{flags_str}]" if flags_str else ""),
	expanded=False,
	):
	info_col1, info_col2, info_col3 = st.columns(3)
	info_col1.markdown(f"Status: {row.get('status', '—')}")
	info_col2.markdown(f"Source: {row.get('source_type', '—')}")
	info_col3.markdown(f"Duration: {row.get('duration_hours', 0):.1f}h \| Threads: {row.get('thread_count', 0)}")

	summary = str(row.get("summary") or "No summary available.")
	st.markdown(f"Summary: {summary}")

	notes_col1, notes_col2 = st.columns(2)
	with notes_col1:
	sent_note = str(row.get("sentiment_notes") or "")
	if sent_note:
	st.markdown(f"Sentiment Note: _{sent_note}_")
	with notes_col2:
	topic_note = str(row.get("topic_notes") or "")
	if topic_note:
	st.markdown(f"Topic Note: _{topic_note}_")