Sentiment_analysis / visualization /components /helpscout_dashboard.py
Danialebrat's picture
Adding members sections
5f1963f
"""
HelpScout Dashboard Page
Full dedicated dashboard for HelpScout customer support conversation analysis.
"""
import sys
from pathlib import Path
import pandas as pd
import streamlit as st
parent_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(parent_dir))
from utils.helpscout_utils import boolean_flag_counts, topic_label, load_topic_taxonomy
from visualizations.helpscout_charts import HelpScoutCharts
from visualizations.demographic_charts import DemographicCharts
from utils.data_processor import SentimentDataProcessor
def _sentiment_score(df) -> float:
"""Compute average sentiment score on a -2 to +2 scale."""
score_map = {"very_positive": 2, "positive": 1, "neutral": 0,
"negative": -1, "very_negative": -2}
if "sentiment_polarity" not in df.columns or df.empty:
return 0.0
scores = df["sentiment_polarity"].map(score_map).fillna(0)
return float(scores.mean())
def render_helpscout_dashboard(data_loader, date_range=None):
"""
Render the full HelpScout Dashboard page.
Args:
data_loader: HelpScoutDataLoader instance
date_range: optional (start_date, end_date) tuple from global sidebar filters
"""
st.title("🎧 HelpScout Support Dashboard")
st.markdown("Customer support conversation analysis from HelpScout.")
hs_df = st.session_state.get("helpscout_df")
if hs_df is None or hs_df.empty:
st.warning("No HelpScout data available. Please check your Snowflake connection.")
return
if date_range and len(date_range) == 2 and "first_message_at" in hs_df.columns:
hs_df = hs_df[
(hs_df["first_message_at"] >= pd.Timestamp(date_range[0])) &
(hs_df["first_message_at"] <= pd.Timestamp(date_range[1]))
]
if hs_df.empty:
st.warning("No HelpScout conversations match the selected date range.")
return
st.info(f"Showing **{len(hs_df):,}** conversations filtered by date range "
f"({date_range[0]} β†’ {date_range[1]})")
charts = HelpScoutCharts()
taxonomy = load_topic_taxonomy()
# ── Member Status Filter ───────────────────────────────────────────────────
has_member_data = "is_member" in hs_df.columns
if has_member_data:
member_filter = st.radio(
"Show conversations for:",
options=["All Customers", "Members Only", "Non-Members Only"],
horizontal=True,
key="hs_dash_member_filter",
)
if member_filter == "Members Only":
hs_df = hs_df[hs_df["is_member"]]
elif member_filter == "Non-Members Only":
hs_df = hs_df[~hs_df["is_member"]]
if member_filter != "All Customers" and hs_df.empty:
st.warning(f"No conversations found for {member_filter.lower().replace(' only', '')}.")
return
if member_filter != "All Customers":
st.info(f"Filtered to **{len(hs_df):,}** {member_filter.lower().replace(' only', '')} conversations.")
else:
st.info("ℹ️ Member data not available β€” customer emails could not be matched to Musora user records.")
# ── PDF Export ────────────────────────────────────────────────────────────
with st.expander("πŸ“„ Export PDF Report", expanded=False):
st.markdown(
"Generate a comprehensive HelpScout support report. "
"Covers sentiment, topics, billing flags, timelines, and demographics."
)
if st.button("Generate HelpScout PDF Report", type="primary",
use_container_width=True, key="hs_dash_pdf_btn"):
with st.spinner("Generating HelpScout PDF report…"):
try:
from utils.helpscout_pdf import HelpScoutDashboardPDF
exporter = HelpScoutDashboardPDF()
pdf_bytes = exporter.generate_report(hs_df)
import datetime
filename = f"helpscout_dashboard_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.pdf"
st.success("Report generated successfully!")
st.download_button(
label="Download HelpScout Dashboard PDF",
data=pdf_bytes,
file_name=filename,
mime="application/pdf",
use_container_width=True,
)
except Exception as e:
st.error(f"Failed to generate report: {e}")
st.exception(e)
st.markdown("---")
# ── KPI Row ───────────────────────────────────────────────────────────────
total = len(hs_df)
escalation_count = int(hs_df["is_escalation"].sum()) if "is_escalation" in hs_df.columns else 0
flags = boolean_flag_counts(hs_df)
neg_pct = (hs_df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() / total * 100) if total else 0
avg_duration = float(hs_df["duration_hours"].mean()) if "duration_hours" in hs_df.columns else 0.0
k1, k2, k3, k4, k5, k6 = st.columns(6)
k1.metric("Total Conversations", f"{total:,}")
k2.metric("Avg Duration (h)", f"{avg_duration:.1f}")
k3.metric("Escalations", f"{escalation_count:,}", delta=f"{escalation_count/total*100:.1f}% of total" if total else None, delta_color="inverse")
k4.metric("Refund Requests", f"{flags['is_refund_request']:,}")
k5.metric("Cancellations", f"{flags['is_cancellation']:,}")
k6.metric("Membership Joins",f"{flags['is_membership']:,}")
st.markdown("---")
# ── Sentiment ─────────────────────────────────────────────────────────────
st.markdown("## 🎯 Sentiment Distribution")
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(charts.create_sentiment_pie_chart(hs_df), use_container_width=True)
with col2:
avg_score = _sentiment_score(hs_df)
st.plotly_chart(charts.create_sentiment_score_gauge(avg_score), use_container_width=True)
m1, m2 = st.columns(2)
pos_pct = hs_df["sentiment_polarity"].isin(["positive", "very_positive"]).sum() / total * 100 if total else 0
m1.metric("Positive %", f"{pos_pct:.1f}%")
m2.metric("Negative %", f"{neg_pct:.1f}%")
st.markdown("---")
# ── Topics ────────────────────────────────────────────────────────────────
st.markdown("## 🏷️ Topic Analysis")
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(charts.create_topic_bar_chart(hs_df, title="Conversations by Topic"),
use_container_width=True)
with col2:
st.plotly_chart(charts.create_topic_pie_chart(hs_df, title="Topic Share"),
use_container_width=True)
st.plotly_chart(charts.create_topic_sentiment_heatmap(hs_df), use_container_width=True)
st.markdown("---")
# ── Emotions ─────────────────────────────────────────────────────────────
if "emotions" in hs_df.columns and hs_df["emotions"].notna().any():
st.markdown("## πŸ’­ Emotion Analysis")
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(charts.create_emotion_bar_chart(hs_df, title="Emotion Distribution"),
use_container_width=True)
with col2:
# Reuse the existing DistributionCharts emotion pie (same df structure with emotions col)
from visualizations.distribution_charts import DistributionCharts
dist_charts = DistributionCharts()
st.plotly_chart(dist_charts.create_emotion_pie_chart(hs_df, title="Emotion Share"),
use_container_width=True)
st.markdown("---")
# ── Billing Flags ─────────────────────────────────────────────────────────
st.markdown("## πŸ’³ Billing & Membership Flags")
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(charts.create_boolean_flags_chart(hs_df), use_container_width=True)
with col2:
st.plotly_chart(charts.create_escalation_breakdown(hs_df), use_container_width=True)
st.markdown("---")
# ── Status / Source ───────────────────────────────────────────────────────
st.markdown("## πŸ“¬ Status & Source Distribution")
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(charts.create_status_distribution(hs_df), use_container_width=True)
with col2:
st.plotly_chart(charts.create_source_distribution(hs_df), use_container_width=True)
st.markdown("---")
# ── Volume & Timelines ────────────────────────────────────────────────────
with st.expander("πŸ“ˆ Volume & Trends", expanded=False):
freq_col, _ = st.columns([1, 3])
with freq_col:
freq = st.selectbox("Time Granularity", ["D", "W", "M"],
format_func=lambda x: {"D": "Daily", "W": "Weekly", "M": "Monthly"}[x],
index=1, key="hs_dash_freq")
st.plotly_chart(charts.create_volume_timeline(hs_df, freq=freq), use_container_width=True)
st.plotly_chart(charts.create_sentiment_timeline(hs_df, freq=freq), use_container_width=True)
all_topics_ranked = charts.get_all_topics_ranked(hs_df)
topic_options = {t: topic_label(t, charts.taxonomy) for t in all_topics_ranked}
default_topics = all_topics_ranked[:5]
selected_topics = st.multiselect(
"Topics to display",
options=list(topic_options.keys()),
default=default_topics,
format_func=lambda t: topic_options[t],
key="hs_dash_topic_select",
)
st.plotly_chart(
charts.create_topic_timeline(hs_df, freq=freq, selected_topics=selected_topics or default_topics),
use_container_width=True,
)
st.plotly_chart(charts.create_refund_cancel_timeline(hs_df, freq=freq), use_container_width=True)
# ── Duration & Thread Count ───────────────────────────────────────────────
with st.expander("πŸ“Š Conversation Depth", expanded=False):
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(charts.create_duration_histogram(hs_df), use_container_width=True)
with col2:
st.plotly_chart(charts.create_thread_count_histogram(hs_df), use_container_width=True)
# ── Member vs Non-Member ─────────────────────────────────────────────────
if "is_member" in hs_df.columns:
st.markdown("---")
st.markdown("## πŸ‘€ Member vs Non-Member")
st.caption(
"Conversations are classified as **Member** when the customer email matches "
"a Musora user account, and **Non-Member** otherwise."
)
member_count = int(hs_df["is_member"].sum())
non_member_count = total - member_count
match_pct = member_count / total * 100 if total else 0
mv1, mv2, mv3 = st.columns(3)
mv1.metric("Members", f"{member_count:,}",
f"{match_pct:.1f}% of conversations" if total else None)
mv2.metric("Non-Members", f"{non_member_count:,}",
f"{100 - match_pct:.1f}% of conversations" if total else None)
mv3.metric("Email Match Rate", f"{match_pct:.1f}%")
mem_col1, mem_col2 = st.columns(2)
with mem_col1:
st.plotly_chart(charts.create_member_status_chart(hs_df),
use_container_width=True, key="hs_dash_member_pie")
with mem_col2:
st.plotly_chart(charts.create_member_sentiment_chart(hs_df),
use_container_width=True, key="hs_dash_member_sentiment")
st.plotly_chart(charts.create_member_topic_chart(hs_df),
use_container_width=True, key="hs_dash_member_topics")
# ── Demographics ─────────────────────────────────────────────────────────
has_demographics = (
"age_group" in hs_df.columns
and "timezone_region" in hs_df.columns
and (hs_df["age_group"] != "Unknown").any()
)
if has_demographics:
st.markdown("---")
st.markdown("## πŸ‘₯ Customer Demographics")
st.info(f"Demographics available for customers whose email matched Musora user records.")
processor = SentimentDataProcessor()
demo_charts = DemographicCharts()
demo_col1, demo_col2, demo_col3, demo_col4 = st.columns(4)
known_demo = int((hs_df["age_group"] != "Unknown").sum())
demo_col1.metric("With Demographics", f"{known_demo:,}", f"{known_demo/total*100:.1f}% matched")
avg_age = hs_df["age"].mean() if "age" in hs_df.columns else None
demo_col2.metric("Average Age", f"{avg_age:.1f}" if avg_age else "N/A")
top_region = hs_df["timezone_region"].value_counts().index[0] if "timezone_region" in hs_df.columns and not hs_df.empty else "N/A"
demo_col3.metric("Top Region", str(top_region))
avg_exp = hs_df["experience_level"].mean() if "experience_level" in hs_df.columns else None
demo_col4.metric("Avg Experience", f"{avg_exp:.1f}/10" if avg_exp else "N/A")
st.markdown("---")
age_dist = processor.get_demographics_distribution(hs_df, "age_group")
if not age_dist.empty:
st.markdown("### Age Distribution")
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(demo_charts.create_age_distribution_chart(age_dist), use_container_width=True)
with col2:
age_sent = processor.get_demographics_by_sentiment(hs_df, "age_group")
if not age_sent.empty:
st.plotly_chart(demo_charts.create_age_sentiment_chart(age_sent), use_container_width=True)
region_dist = processor.get_timezone_regions_distribution(hs_df)
if not region_dist.empty:
st.markdown("### Geographic Distribution")
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(demo_charts.create_region_distribution_chart(region_dist), use_container_width=True)
with col2:
region_sent = processor.get_demographics_by_sentiment(hs_df, "timezone_region")
if not region_sent.empty:
st.plotly_chart(demo_charts.create_region_sentiment_chart(region_sent), use_container_width=True)
st.markdown("---")
st.caption(
"Data source: SOCIAL_MEDIA_DB.ML_FEATURES.HELPSCOUT_CONVERSATION_FEATURES | "
f"Last processed: {hs_df['processed_at'].max().strftime('%Y-%m-%d %H:%M') if 'processed_at' in hs_df.columns and not hs_df.empty else 'Unknown'}"
)
# ─────────────────────────────────────────────────────────────────────────────
# Compact summary for embedding in the main Sentiment Dashboard
# ─────────────────────────────────────────────────────────────────────────────
def render_helpscout_compact_summary(hs_df):
"""
A one-screen HelpScout summary section embedded at the bottom of the
main Sentiment Dashboard. Kept purposely brief.
"""
st.markdown("---")
st.markdown("## 🎧 HelpScout Support β€” Quick View")
st.caption(f"{len(hs_df):,} processed customer conversations")
total = len(hs_df)
if total == 0:
st.info("No HelpScout conversations available.")
return
charts = HelpScoutCharts()
flags = boolean_flag_counts(hs_df)
escalation_count = int(hs_df["is_escalation"].sum()) if "is_escalation" in hs_df.columns else 0
avg_dur = float(hs_df["duration_hours"].mean()) if "duration_hours" in hs_df.columns else 0.0
k1, k2, k3, k4 = st.columns(4)
k1.metric("Conversations", f"{total:,}")
k2.metric("Escalations", f"{escalation_count:,}", delta=f"{escalation_count/total*100:.1f}%", delta_color="inverse")
k3.metric("Refund Requests", f"{flags['is_refund_request']:,}")
k4.metric("Avg Duration (h)", f"{avg_dur:.1f}")
col1, col2 = st.columns(2)
with col1:
st.plotly_chart(
charts.create_sentiment_pie_chart(hs_df, title="HelpScout Sentiment"),
use_container_width=True,
key="hs_compact_sentiment_pie",
)
with col2:
st.plotly_chart(
charts.create_topic_bar_chart(hs_df, title="Top Topics", top_n=5),
use_container_width=True,
key="hs_compact_topic_bar",
)
st.info("πŸ‘‰ Navigate to **🎧 HelpScout Dashboard** for the full analysis.")