Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

File size: 18,161 Bytes

"""
HelpScout Dashboard Page
Full dedicated dashboard for HelpScout customer support conversation analysis.
"""
import sys
from pathlib import Path

import pandas as pd
import streamlit as st

parent_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(parent_dir))

from utils.helpscout_utils import boolean_flag_counts, topic_label, load_topic_taxonomy
from visualizations.helpscout_charts import HelpScoutCharts
from visualizations.demographic_charts import DemographicCharts
from utils.data_processor import SentimentDataProcessor


def _sentiment_score(df) -> float:
    """Compute average sentiment score on a -2 to +2 scale."""
    score_map = {"very_positive": 2, "positive": 1, "neutral": 0,
                 "negative": -1, "very_negative": -2}
    if "sentiment_polarity" not in df.columns or df.empty:
        return 0.0
    scores = df["sentiment_polarity"].map(score_map).fillna(0)
    return float(scores.mean())


def render_helpscout_dashboard(data_loader, date_range=None):
    """
    Render the full HelpScout Dashboard page.

    Args:
        data_loader: HelpScoutDataLoader instance
        date_range: optional (start_date, end_date) tuple from global sidebar filters
    """
    st.title("🎧 HelpScout Support Dashboard")
    st.markdown("Customer support conversation analysis from HelpScout.")

    hs_df = st.session_state.get("helpscout_df")
    if hs_df is None or hs_df.empty:
        st.warning("No HelpScout data available. Please check your Snowflake connection.")
        return

    if date_range and len(date_range) == 2 and "first_message_at" in hs_df.columns:
        hs_df = hs_df[
            (hs_df["first_message_at"] >= pd.Timestamp(date_range[0])) &
            (hs_df["first_message_at"] <= pd.Timestamp(date_range[1]))
        ]
        if hs_df.empty:
            st.warning("No HelpScout conversations match the selected date range.")
            return
        st.info(f"Showing **{len(hs_df):,}** conversations filtered by date range "
                f"({date_range[0]} → {date_range[1]})")

    charts = HelpScoutCharts()
    taxonomy = load_topic_taxonomy()

    # ── Member Status Filter ───────────────────────────────────────────────────
    has_member_data = "is_member" in hs_df.columns
    if has_member_data:
        member_filter = st.radio(
            "Show conversations for:",
            options=["All Customers", "Members Only", "Non-Members Only"],
            horizontal=True,
            key="hs_dash_member_filter",
        )
        if member_filter == "Members Only":
            hs_df = hs_df[hs_df["is_member"]]
        elif member_filter == "Non-Members Only":
            hs_df = hs_df[~hs_df["is_member"]]
        if member_filter != "All Customers" and hs_df.empty:
            st.warning(f"No conversations found for {member_filter.lower().replace(' only', '')}.")
            return
        if member_filter != "All Customers":
            st.info(f"Filtered to **{len(hs_df):,}** {member_filter.lower().replace(' only', '')} conversations.")
    else:
        st.info("ℹ️ Member data not available — customer emails could not be matched to Musora user records.")

    # ── PDF Export ────────────────────────────────────────────────────────────
    with st.expander("📄 Export PDF Report", expanded=False):
        st.markdown(
            "Generate a comprehensive HelpScout support report. "
            "Covers sentiment, topics, billing flags, timelines, and demographics."
        )
        if st.button("Generate HelpScout PDF Report", type="primary",
                     use_container_width=True, key="hs_dash_pdf_btn"):
            with st.spinner("Generating HelpScout PDF report…"):
                try:
                    from utils.helpscout_pdf import HelpScoutDashboardPDF
                    exporter = HelpScoutDashboardPDF()
                    pdf_bytes = exporter.generate_report(hs_df)
                    import datetime
                    filename = f"helpscout_dashboard_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.pdf"
                    st.success("Report generated successfully!")
                    st.download_button(
                        label="Download HelpScout Dashboard PDF",
                        data=pdf_bytes,
                        file_name=filename,
                        mime="application/pdf",
                        use_container_width=True,
                    )
                except Exception as e:
                    st.error(f"Failed to generate report: {e}")
                    st.exception(e)

    st.markdown("---")

    # ── KPI Row ───────────────────────────────────────────────────────────────
    total = len(hs_df)
    escalation_count = int(hs_df["is_escalation"].sum()) if "is_escalation" in hs_df.columns else 0
    flags = boolean_flag_counts(hs_df)
    neg_pct = (hs_df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() / total * 100) if total else 0
    avg_duration = float(hs_df["duration_hours"].mean()) if "duration_hours" in hs_df.columns else 0.0

    k1, k2, k3, k4, k5, k6 = st.columns(6)
    k1.metric("Total Conversations", f"{total:,}")
    k2.metric("Avg Duration (h)", f"{avg_duration:.1f}")
    k3.metric("Escalations", f"{escalation_count:,}", delta=f"{escalation_count/total*100:.1f}% of total" if total else None, delta_color="inverse")
    k4.metric("Refund Requests", f"{flags['is_refund_request']:,}")
    k5.metric("Cancellations",   f"{flags['is_cancellation']:,}")
    k6.metric("Membership Joins",f"{flags['is_membership']:,}")

    st.markdown("---")

    # ── Sentiment ─────────────────────────────────────────────────────────────
    st.markdown("## 🎯 Sentiment Distribution")
    col1, col2 = st.columns(2)
    with col1:
        st.plotly_chart(charts.create_sentiment_pie_chart(hs_df), use_container_width=True)
    with col2:
        avg_score = _sentiment_score(hs_df)
        st.plotly_chart(charts.create_sentiment_score_gauge(avg_score), use_container_width=True)
        m1, m2 = st.columns(2)
        pos_pct = hs_df["sentiment_polarity"].isin(["positive", "very_positive"]).sum() / total * 100 if total else 0
        m1.metric("Positive %", f"{pos_pct:.1f}%")
        m2.metric("Negative %", f"{neg_pct:.1f}%")

    st.markdown("---")

    # ── Topics ────────────────────────────────────────────────────────────────
    st.markdown("## 🏷️ Topic Analysis")
    col1, col2 = st.columns(2)
    with col1:
        st.plotly_chart(charts.create_topic_bar_chart(hs_df, title="Conversations by Topic"),
                        use_container_width=True)
    with col2:
        st.plotly_chart(charts.create_topic_pie_chart(hs_df, title="Topic Share"),
                        use_container_width=True)

    st.plotly_chart(charts.create_topic_sentiment_heatmap(hs_df), use_container_width=True)

    st.markdown("---")

    # ── Emotions ─────────────────────────────────────────────────────────────
    if "emotions" in hs_df.columns and hs_df["emotions"].notna().any():
        st.markdown("## 💭 Emotion Analysis")
        col1, col2 = st.columns(2)
        with col1:
            st.plotly_chart(charts.create_emotion_bar_chart(hs_df, title="Emotion Distribution"),
                            use_container_width=True)
        with col2:
            # Reuse the existing DistributionCharts emotion pie (same df structure with emotions col)
            from visualizations.distribution_charts import DistributionCharts
            dist_charts = DistributionCharts()
            st.plotly_chart(dist_charts.create_emotion_pie_chart(hs_df, title="Emotion Share"),
                            use_container_width=True)
        st.markdown("---")

    # ── Billing Flags ─────────────────────────────────────────────────────────
    st.markdown("## 💳 Billing & Membership Flags")
    col1, col2 = st.columns(2)
    with col1:
        st.plotly_chart(charts.create_boolean_flags_chart(hs_df), use_container_width=True)
    with col2:
        st.plotly_chart(charts.create_escalation_breakdown(hs_df), use_container_width=True)

    st.markdown("---")

    # ── Status / Source ───────────────────────────────────────────────────────
    st.markdown("## 📬 Status & Source Distribution")
    col1, col2 = st.columns(2)
    with col1:
        st.plotly_chart(charts.create_status_distribution(hs_df), use_container_width=True)
    with col2:
        st.plotly_chart(charts.create_source_distribution(hs_df), use_container_width=True)

    st.markdown("---")

    # ── Volume & Timelines ────────────────────────────────────────────────────
    with st.expander("📈 Volume & Trends", expanded=False):
        freq_col, _ = st.columns([1, 3])
        with freq_col:
            freq = st.selectbox("Time Granularity", ["D", "W", "M"],
                                format_func=lambda x: {"D": "Daily", "W": "Weekly", "M": "Monthly"}[x],
                                index=1, key="hs_dash_freq")
        st.plotly_chart(charts.create_volume_timeline(hs_df, freq=freq), use_container_width=True)
        st.plotly_chart(charts.create_sentiment_timeline(hs_df, freq=freq), use_container_width=True)

        all_topics_ranked = charts.get_all_topics_ranked(hs_df)
        topic_options = {t: topic_label(t, charts.taxonomy) for t in all_topics_ranked}
        default_topics = all_topics_ranked[:5]
        selected_topics = st.multiselect(
            "Topics to display",
            options=list(topic_options.keys()),
            default=default_topics,
            format_func=lambda t: topic_options[t],
            key="hs_dash_topic_select",
        )
        st.plotly_chart(
            charts.create_topic_timeline(hs_df, freq=freq, selected_topics=selected_topics or default_topics),
            use_container_width=True,
        )
        st.plotly_chart(charts.create_refund_cancel_timeline(hs_df, freq=freq), use_container_width=True)

    # ── Duration & Thread Count ───────────────────────────────────────────────
    with st.expander("📊 Conversation Depth", expanded=False):
        col1, col2 = st.columns(2)
        with col1:
            st.plotly_chart(charts.create_duration_histogram(hs_df), use_container_width=True)
        with col2:
            st.plotly_chart(charts.create_thread_count_histogram(hs_df), use_container_width=True)

    # ── Member vs Non-Member ─────────────────────────────────────────────────
    if "is_member" in hs_df.columns:
        st.markdown("---")
        st.markdown("## 👤 Member vs Non-Member")
        st.caption(
            "Conversations are classified as **Member** when the customer email matches "
            "a Musora user account, and **Non-Member** otherwise."
        )

        member_count     = int(hs_df["is_member"].sum())
        non_member_count = total - member_count
        match_pct        = member_count / total * 100 if total else 0

        mv1, mv2, mv3 = st.columns(3)
        mv1.metric("Members",      f"{member_count:,}",
                   f"{match_pct:.1f}% of conversations" if total else None)
        mv2.metric("Non-Members",  f"{non_member_count:,}",
                   f"{100 - match_pct:.1f}% of conversations" if total else None)
        mv3.metric("Email Match Rate", f"{match_pct:.1f}%")

        mem_col1, mem_col2 = st.columns(2)
        with mem_col1:
            st.plotly_chart(charts.create_member_status_chart(hs_df),
                            use_container_width=True, key="hs_dash_member_pie")
        with mem_col2:
            st.plotly_chart(charts.create_member_sentiment_chart(hs_df),
                            use_container_width=True, key="hs_dash_member_sentiment")

        st.plotly_chart(charts.create_member_topic_chart(hs_df),
                        use_container_width=True, key="hs_dash_member_topics")

    # ── Demographics ─────────────────────────────────────────────────────────
    has_demographics = (
        "age_group" in hs_df.columns
        and "timezone_region" in hs_df.columns
        and (hs_df["age_group"] != "Unknown").any()
    )
    if has_demographics:
        st.markdown("---")
        st.markdown("## 👥 Customer Demographics")
        st.info(f"Demographics available for customers whose email matched Musora user records.")

        processor = SentimentDataProcessor()
        demo_charts = DemographicCharts()

        demo_col1, demo_col2, demo_col3, demo_col4 = st.columns(4)
        known_demo = int((hs_df["age_group"] != "Unknown").sum())
        demo_col1.metric("With Demographics", f"{known_demo:,}", f"{known_demo/total*100:.1f}% matched")

        avg_age = hs_df["age"].mean() if "age" in hs_df.columns else None
        demo_col2.metric("Average Age", f"{avg_age:.1f}" if avg_age else "N/A")

        top_region = hs_df["timezone_region"].value_counts().index[0] if "timezone_region" in hs_df.columns and not hs_df.empty else "N/A"
        demo_col3.metric("Top Region", str(top_region))

        avg_exp = hs_df["experience_level"].mean() if "experience_level" in hs_df.columns else None
        demo_col4.metric("Avg Experience", f"{avg_exp:.1f}/10" if avg_exp else "N/A")

        st.markdown("---")
        age_dist = processor.get_demographics_distribution(hs_df, "age_group")
        if not age_dist.empty:
            st.markdown("### Age Distribution")
            col1, col2 = st.columns(2)
            with col1:
                st.plotly_chart(demo_charts.create_age_distribution_chart(age_dist), use_container_width=True)
            with col2:
                age_sent = processor.get_demographics_by_sentiment(hs_df, "age_group")
                if not age_sent.empty:
                    st.plotly_chart(demo_charts.create_age_sentiment_chart(age_sent), use_container_width=True)

        region_dist = processor.get_timezone_regions_distribution(hs_df)
        if not region_dist.empty:
            st.markdown("### Geographic Distribution")
            col1, col2 = st.columns(2)
            with col1:
                st.plotly_chart(demo_charts.create_region_distribution_chart(region_dist), use_container_width=True)
            with col2:
                region_sent = processor.get_demographics_by_sentiment(hs_df, "timezone_region")
                if not region_sent.empty:
                    st.plotly_chart(demo_charts.create_region_sentiment_chart(region_sent), use_container_width=True)

    st.markdown("---")
    st.caption(
        "Data source: SOCIAL_MEDIA_DB.ML_FEATURES.HELPSCOUT_CONVERSATION_FEATURES | "
        f"Last processed: {hs_df['processed_at'].max().strftime('%Y-%m-%d %H:%M') if 'processed_at' in hs_df.columns and not hs_df.empty else 'Unknown'}"
    )


# ─────────────────────────────────────────────────────────────────────────────
# Compact summary for embedding in the main Sentiment Dashboard
# ─────────────────────────────────────────────────────────────────────────────

def render_helpscout_compact_summary(hs_df):
    """
    A one-screen HelpScout summary section embedded at the bottom of the
    main Sentiment Dashboard. Kept purposely brief.
    """
    st.markdown("---")
    st.markdown("## 🎧 HelpScout Support — Quick View")
    st.caption(f"{len(hs_df):,} processed customer conversations")

    total = len(hs_df)
    if total == 0:
        st.info("No HelpScout conversations available.")
        return

    charts = HelpScoutCharts()
    flags  = boolean_flag_counts(hs_df)
    escalation_count = int(hs_df["is_escalation"].sum()) if "is_escalation" in hs_df.columns else 0
    avg_dur = float(hs_df["duration_hours"].mean()) if "duration_hours" in hs_df.columns else 0.0

    k1, k2, k3, k4 = st.columns(4)
    k1.metric("Conversations", f"{total:,}")
    k2.metric("Escalations",   f"{escalation_count:,}", delta=f"{escalation_count/total*100:.1f}%", delta_color="inverse")
    k3.metric("Refund Requests", f"{flags['is_refund_request']:,}")
    k4.metric("Avg Duration (h)", f"{avg_dur:.1f}")

    col1, col2 = st.columns(2)
    with col1:
        st.plotly_chart(
            charts.create_sentiment_pie_chart(hs_df, title="HelpScout Sentiment"),
            use_container_width=True,
            key="hs_compact_sentiment_pie",
        )
    with col2:
        st.plotly_chart(
            charts.create_topic_bar_chart(hs_df, title="Top Topics", top_n=5),
            use_container_width=True,
            key="hs_compact_topic_bar",
        )

    st.info("👉 Navigate to **🎧 HelpScout Dashboard** for the full analysis.")