Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /components /helpscout_dashboard.py

Danialebrat

Adding members sections

5f1963f 26 days ago

raw

history blame contribute delete

18.2 kB

	"""
	HelpScout Dashboard Page
	Full dedicated dashboard for HelpScout customer support conversation analysis.
	"""
	import sys
	from pathlib import Path

	import pandas as pd
	import streamlit as st

	parent_dir = Path(__file__).resolve().parent.parent
	sys.path.append(str(parent_dir))

	from utils.helpscout_utils import boolean_flag_counts, topic_label, load_topic_taxonomy
	from visualizations.helpscout_charts import HelpScoutCharts
	from visualizations.demographic_charts import DemographicCharts
	from utils.data_processor import SentimentDataProcessor


	def _sentiment_score(df) -> float:
	"""Compute average sentiment score on a -2 to +2 scale."""
	score_map = {"very_positive": 2, "positive": 1, "neutral": 0,
	"negative": -1, "very_negative": -2}
	if "sentiment_polarity" not in df.columns or df.empty:
	return 0.0
	scores = df["sentiment_polarity"].map(score_map).fillna(0)
	return float(scores.mean())


	def render_helpscout_dashboard(data_loader, date_range=None):
	"""
	Render the full HelpScout Dashboard page.

	Args:
	data_loader: HelpScoutDataLoader instance
	date_range: optional (start_date, end_date) tuple from global sidebar filters
	"""
	st.title("🎧 HelpScout Support Dashboard")
	st.markdown("Customer support conversation analysis from HelpScout.")

	hs_df = st.session_state.get("helpscout_df")
	if hs_df is None or hs_df.empty:
	st.warning("No HelpScout data available. Please check your Snowflake connection.")
	return

	if date_range and len(date_range) == 2 and "first_message_at" in hs_df.columns:
	hs_df = hs_df[
	(hs_df["first_message_at"] >= pd.Timestamp(date_range[0])) &
	(hs_df["first_message_at"] <= pd.Timestamp(date_range[1]))
	]
	if hs_df.empty:
	st.warning("No HelpScout conversations match the selected date range.")
	return
	st.info(f"Showing {len(hs_df):,} conversations filtered by date range "
	f"({date_range[0]} → {date_range[1]})")

	charts = HelpScoutCharts()
	taxonomy = load_topic_taxonomy()

	# ── Member Status Filter ───────────────────────────────────────────────────
	has_member_data = "is_member" in hs_df.columns
	if has_member_data:
	member_filter = st.radio(
	"Show conversations for:",
	options=["All Customers", "Members Only", "Non-Members Only"],
	horizontal=True,
	key="hs_dash_member_filter",
	)
	if member_filter == "Members Only":
	hs_df = hs_df[hs_df["is_member"]]
	elif member_filter == "Non-Members Only":
	hs_df = hs_df[~hs_df["is_member"]]
	if member_filter != "All Customers" and hs_df.empty:
	st.warning(f"No conversations found for {member_filter.lower().replace(' only', '')}.")
	return
	if member_filter != "All Customers":
	st.info(f"Filtered to {len(hs_df):,} {member_filter.lower().replace(' only', '')} conversations.")
	else:
	st.info("ℹ️ Member data not available — customer emails could not be matched to Musora user records.")

	# ── PDF Export ────────────────────────────────────────────────────────────
	with st.expander("📄 Export PDF Report", expanded=False):
	st.markdown(
	"Generate a comprehensive HelpScout support report. "
	"Covers sentiment, topics, billing flags, timelines, and demographics."
	)
	if st.button("Generate HelpScout PDF Report", type="primary",
	use_container_width=True, key="hs_dash_pdf_btn"):
	with st.spinner("Generating HelpScout PDF report…"):
	try:
	from utils.helpscout_pdf import HelpScoutDashboardPDF
	exporter = HelpScoutDashboardPDF()
	pdf_bytes = exporter.generate_report(hs_df)
	import datetime
	filename = f"helpscout_dashboard_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.pdf"
	st.success("Report generated successfully!")
	st.download_button(
	label="Download HelpScout Dashboard PDF",
	data=pdf_bytes,
	file_name=filename,
	mime="application/pdf",
	use_container_width=True,
	)
	except Exception as e:
	st.error(f"Failed to generate report: {e}")
	st.exception(e)

	st.markdown("---")

	# ── KPI Row ───────────────────────────────────────────────────────────────
	total = len(hs_df)
	escalation_count = int(hs_df["is_escalation"].sum()) if "is_escalation" in hs_df.columns else 0
	flags = boolean_flag_counts(hs_df)
	neg_pct = (hs_df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() / total * 100) if total else 0
	avg_duration = float(hs_df["duration_hours"].mean()) if "duration_hours" in hs_df.columns else 0.0

	k1, k2, k3, k4, k5, k6 = st.columns(6)
	k1.metric("Total Conversations", f"{total:,}")
	k2.metric("Avg Duration (h)", f"{avg_duration:.1f}")
	k3.metric("Escalations", f"{escalation_count:,}", delta=f"{escalation_count/total*100:.1f}% of total" if total else None, delta_color="inverse")
	k4.metric("Refund Requests", f"{flags['is_refund_request']:,}")
	k5.metric("Cancellations", f"{flags['is_cancellation']:,}")
	k6.metric("Membership Joins",f"{flags['is_membership']:,}")

	st.markdown("---")

	# ── Sentiment ─────────────────────────────────────────────────────────────
	st.markdown("## 🎯 Sentiment Distribution")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_sentiment_pie_chart(hs_df), use_container_width=True)
	with col2:
	avg_score = _sentiment_score(hs_df)
	st.plotly_chart(charts.create_sentiment_score_gauge(avg_score), use_container_width=True)
	m1, m2 = st.columns(2)
	pos_pct = hs_df["sentiment_polarity"].isin(["positive", "very_positive"]).sum() / total * 100 if total else 0
	m1.metric("Positive %", f"{pos_pct:.1f}%")
	m2.metric("Negative %", f"{neg_pct:.1f}%")

	st.markdown("---")

	# ── Topics ────────────────────────────────────────────────────────────────
	st.markdown("## 🏷️ Topic Analysis")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_topic_bar_chart(hs_df, title="Conversations by Topic"),
	use_container_width=True)
	with col2:
	st.plotly_chart(charts.create_topic_pie_chart(hs_df, title="Topic Share"),
	use_container_width=True)

	st.plotly_chart(charts.create_topic_sentiment_heatmap(hs_df), use_container_width=True)

	st.markdown("---")

	# ── Emotions ─────────────────────────────────────────────────────────────
	if "emotions" in hs_df.columns and hs_df["emotions"].notna().any():
	st.markdown("## 💭 Emotion Analysis")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_emotion_bar_chart(hs_df, title="Emotion Distribution"),
	use_container_width=True)
	with col2:
	# Reuse the existing DistributionCharts emotion pie (same df structure with emotions col)
	from visualizations.distribution_charts import DistributionCharts
	dist_charts = DistributionCharts()
	st.plotly_chart(dist_charts.create_emotion_pie_chart(hs_df, title="Emotion Share"),
	use_container_width=True)
	st.markdown("---")

	# ── Billing Flags ─────────────────────────────────────────────────────────
	st.markdown("## 💳 Billing & Membership Flags")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_boolean_flags_chart(hs_df), use_container_width=True)
	with col2:
	st.plotly_chart(charts.create_escalation_breakdown(hs_df), use_container_width=True)

	st.markdown("---")

	# ── Status / Source ───────────────────────────────────────────────────────
	st.markdown("## 📬 Status & Source Distribution")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_status_distribution(hs_df), use_container_width=True)
	with col2:
	st.plotly_chart(charts.create_source_distribution(hs_df), use_container_width=True)

	st.markdown("---")

	# ── Volume & Timelines ────────────────────────────────────────────────────
	with st.expander("📈 Volume & Trends", expanded=False):
	freq_col, _ = st.columns([1, 3])
	with freq_col:
	freq = st.selectbox("Time Granularity", ["D", "W", "M"],
	format_func=lambda x: {"D": "Daily", "W": "Weekly", "M": "Monthly"}[x],
	index=1, key="hs_dash_freq")
	st.plotly_chart(charts.create_volume_timeline(hs_df, freq=freq), use_container_width=True)
	st.plotly_chart(charts.create_sentiment_timeline(hs_df, freq=freq), use_container_width=True)

	all_topics_ranked = charts.get_all_topics_ranked(hs_df)
	topic_options = {t: topic_label(t, charts.taxonomy) for t in all_topics_ranked}
	default_topics = all_topics_ranked[:5]
	selected_topics = st.multiselect(
	"Topics to display",
	options=list(topic_options.keys()),
	default=default_topics,
	format_func=lambda t: topic_options[t],
	key="hs_dash_topic_select",
	)
	st.plotly_chart(
	charts.create_topic_timeline(hs_df, freq=freq, selected_topics=selected_topics or default_topics),
	use_container_width=True,
	)
	st.plotly_chart(charts.create_refund_cancel_timeline(hs_df, freq=freq), use_container_width=True)

	# ── Duration & Thread Count ───────────────────────────────────────────────
	with st.expander("📊 Conversation Depth", expanded=False):
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(charts.create_duration_histogram(hs_df), use_container_width=True)
	with col2:
	st.plotly_chart(charts.create_thread_count_histogram(hs_df), use_container_width=True)

	# ── Member vs Non-Member ─────────────────────────────────────────────────
	if "is_member" in hs_df.columns:
	st.markdown("---")
	st.markdown("## 👤 Member vs Non-Member")
	st.caption(
	"Conversations are classified as Member when the customer email matches "
	"a Musora user account, and Non-Member otherwise."
	)

	member_count = int(hs_df["is_member"].sum())
	non_member_count = total - member_count
	match_pct = member_count / total * 100 if total else 0

	mv1, mv2, mv3 = st.columns(3)
	mv1.metric("Members", f"{member_count:,}",
	f"{match_pct:.1f}% of conversations" if total else None)
	mv2.metric("Non-Members", f"{non_member_count:,}",
	f"{100 - match_pct:.1f}% of conversations" if total else None)
	mv3.metric("Email Match Rate", f"{match_pct:.1f}%")

	mem_col1, mem_col2 = st.columns(2)
	with mem_col1:
	st.plotly_chart(charts.create_member_status_chart(hs_df),
	use_container_width=True, key="hs_dash_member_pie")
	with mem_col2:
	st.plotly_chart(charts.create_member_sentiment_chart(hs_df),
	use_container_width=True, key="hs_dash_member_sentiment")

	st.plotly_chart(charts.create_member_topic_chart(hs_df),
	use_container_width=True, key="hs_dash_member_topics")

	# ── Demographics ─────────────────────────────────────────────────────────
	has_demographics = (
	"age_group" in hs_df.columns
	and "timezone_region" in hs_df.columns
	and (hs_df["age_group"] != "Unknown").any()
	)
	if has_demographics:
	st.markdown("---")
	st.markdown("## 👥 Customer Demographics")
	st.info(f"Demographics available for customers whose email matched Musora user records.")

	processor = SentimentDataProcessor()
	demo_charts = DemographicCharts()

	demo_col1, demo_col2, demo_col3, demo_col4 = st.columns(4)
	known_demo = int((hs_df["age_group"] != "Unknown").sum())
	demo_col1.metric("With Demographics", f"{known_demo:,}", f"{known_demo/total*100:.1f}% matched")

	avg_age = hs_df["age"].mean() if "age" in hs_df.columns else None
	demo_col2.metric("Average Age", f"{avg_age:.1f}" if avg_age else "N/A")

	top_region = hs_df["timezone_region"].value_counts().index[0] if "timezone_region" in hs_df.columns and not hs_df.empty else "N/A"
	demo_col3.metric("Top Region", str(top_region))

	avg_exp = hs_df["experience_level"].mean() if "experience_level" in hs_df.columns else None
	demo_col4.metric("Avg Experience", f"{avg_exp:.1f}/10" if avg_exp else "N/A")

	st.markdown("---")
	age_dist = processor.get_demographics_distribution(hs_df, "age_group")
	if not age_dist.empty:
	st.markdown("### Age Distribution")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(demo_charts.create_age_distribution_chart(age_dist), use_container_width=True)
	with col2:
	age_sent = processor.get_demographics_by_sentiment(hs_df, "age_group")
	if not age_sent.empty:
	st.plotly_chart(demo_charts.create_age_sentiment_chart(age_sent), use_container_width=True)

	region_dist = processor.get_timezone_regions_distribution(hs_df)
	if not region_dist.empty:
	st.markdown("### Geographic Distribution")
	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(demo_charts.create_region_distribution_chart(region_dist), use_container_width=True)
	with col2:
	region_sent = processor.get_demographics_by_sentiment(hs_df, "timezone_region")
	if not region_sent.empty:
	st.plotly_chart(demo_charts.create_region_sentiment_chart(region_sent), use_container_width=True)

	st.markdown("---")
	st.caption(
	"Data source: SOCIAL_MEDIA_DB.ML_FEATURES.HELPSCOUT_CONVERSATION_FEATURES \| "
	f"Last processed: {hs_df['processed_at'].max().strftime('%Y-%m-%d %H:%M') if 'processed_at' in hs_df.columns and not hs_df.empty else 'Unknown'}"
	)


	# ─────────────────────────────────────────────────────────────────────────────
	# Compact summary for embedding in the main Sentiment Dashboard
	# ─────────────────────────────────────────────────────────────────────────────

	def render_helpscout_compact_summary(hs_df):
	"""
	A one-screen HelpScout summary section embedded at the bottom of the
	main Sentiment Dashboard. Kept purposely brief.
	"""
	st.markdown("---")
	st.markdown("## 🎧 HelpScout Support — Quick View")
	st.caption(f"{len(hs_df):,} processed customer conversations")

	total = len(hs_df)
	if total == 0:
	st.info("No HelpScout conversations available.")
	return

	charts = HelpScoutCharts()
	flags = boolean_flag_counts(hs_df)
	escalation_count = int(hs_df["is_escalation"].sum()) if "is_escalation" in hs_df.columns else 0
	avg_dur = float(hs_df["duration_hours"].mean()) if "duration_hours" in hs_df.columns else 0.0

	k1, k2, k3, k4 = st.columns(4)
	k1.metric("Conversations", f"{total:,}")
	k2.metric("Escalations", f"{escalation_count:,}", delta=f"{escalation_count/total*100:.1f}%", delta_color="inverse")
	k3.metric("Refund Requests", f"{flags['is_refund_request']:,}")
	k4.metric("Avg Duration (h)", f"{avg_dur:.1f}")

	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(
	charts.create_sentiment_pie_chart(hs_df, title="HelpScout Sentiment"),
	use_container_width=True,
	key="hs_compact_sentiment_pie",
	)
	with col2:
	st.plotly_chart(
	charts.create_topic_bar_chart(hs_df, title="Top Topics", top_n=5),
	use_container_width=True,
	key="hs_compact_topic_bar",
	)

	st.info("👉 Navigate to 🎧 HelpScout Dashboard for the full analysis.")