""" HelpScout Analysis Page Purpose-built analysis page for HelpScout conversations. Mirrors the SA page architecture: filter β†’ fetch β†’ charts β†’ LLM summary β†’ export. One page-level summary report for the entire filtered set. """ import sys from datetime import date, timedelta from pathlib import Path import pandas as pd import streamlit as st parent_dir = Path(__file__).resolve().parent.parent sys.path.append(str(parent_dir)) from visualizations.helpscout_charts import HelpScoutCharts from utils.helpscout_utils import ( boolean_flag_counts, build_filter_description, topic_label, load_topic_taxonomy ) from agents.helpscout_summary_agent import HelpScoutSummaryAgent def render_helpscout_analysis(data_loader): """ Render the HelpScout Analysis page. Args: data_loader: HelpScoutDataLoader instance """ st.title("πŸ”¬ HelpScout Analysis") st.markdown( "Deep-dive into customer support conversations. Apply filters, fetch the data, " "explore distributions, and generate an AI-powered summary report." ) st.markdown("---") charts = HelpScoutCharts() taxonomy = load_topic_taxonomy() # ── Filter options from already-loaded dashboard df ─────────────────────── hs_df = st.session_state.get("helpscout_df") if hs_df is None or hs_df.empty: st.warning("HelpScout dashboard data not loaded yet. Please wait for the app to initialise.") return filter_options = data_loader.get_filter_options(hs_df) # ── Filters ─────────────────────────────────────────────────────────────── st.markdown("### 🎯 Filters") row1_col1, row1_col2 = st.columns(2) with row1_col1: min_date = hs_df["first_message_at"].min().date() if "first_message_at" in hs_df.columns and not hs_df.empty else date.today() - timedelta(days=60) max_date = hs_df["first_message_at"].max().date() if "first_message_at" in hs_df.columns and not hs_df.empty else date.today() default_start = max(min_date, max_date - timedelta(days=data_loader.default_date_range_days)) date_range = st.date_input( "Date Range (First Message At)", value=(default_start, max_date), min_value=min_date, max_value=max_date, key="hs_analysis_date_range", ) with row1_col2: top_n_options = [("All", 0), ("50", 50), ("100", 100), ("200", 200), ("500", 500), ("1000", 1000)] top_n_label = st.selectbox( "Limit Results", options=[x[0] for x in top_n_options], index=0, help="Limit number of conversations fetched. 'All' fetches everything matching your filters.", key="hs_analysis_top_n", ) top_n = dict(top_n_options)[top_n_label] row2_col1, row2_col2, row2_col3, row2_col4 = st.columns(4) with row2_col1: topic_options = filter_options.get("topics", []) topic_labels_map = {t: topic_label(t, taxonomy) for t in topic_options} selected_topic_labels = st.multiselect( "Topics", options=[topic_labels_map[t] for t in topic_options], default=[], key="hs_analysis_topics", ) label_to_id = {v: k for k, v in topic_labels_map.items()} selected_topics = [label_to_id[l] for l in selected_topic_labels if l in label_to_id] with row2_col2: selected_sentiments = st.multiselect( "Sentiments", options=filter_options.get("sentiments", []), default=[], key="hs_analysis_sentiments", ) with row2_col3: selected_statuses = st.multiselect( "Status", options=filter_options.get("statuses", []), default=[], key="hs_analysis_statuses", ) with row2_col4: selected_sources = st.multiselect( "Source Type", options=filter_options.get("sources", []), default=[], key="hs_analysis_sources", ) row3_col1, row3_col2, row3_col3, row3_col4 = st.columns(4) with row3_col1: refund_only = st.checkbox("Refund Requests Only", key="hs_analysis_refund") with row3_col2: cancel_only = st.checkbox("Cancellations Only", key="hs_analysis_cancel") with row3_col3: membership_only = st.checkbox("Membership Joins Only", key="hs_analysis_membership") with row3_col4: member_status_filter = st.selectbox( "Customer Type", options=["All", "Members Only", "Non-Members Only"], index=0, help="Members are customers whose email matches a Musora user account.", key="hs_analysis_member_status", ) st.markdown("---") # ── Fetch button ───────────────────────────────────────────────────────── dr_tuple = (str(date_range[0]), str(date_range[1])) if date_range and len(date_range) == 2 else None fetch_key = ( dr_tuple, tuple(sorted(selected_sentiments)), tuple(sorted(selected_topics)), tuple(sorted(selected_statuses)), tuple(sorted(selected_sources)), bool(refund_only), bool(cancel_only), bool(membership_only), top_n, ) has_data = ( "hs_analysis_df" in st.session_state and st.session_state.get("hs_analysis_fetch_key") == fetch_key and not st.session_state["hs_analysis_df"].empty ) fetch_col, info_col = st.columns([1, 3]) with fetch_col: fetch_clicked = st.button("πŸš€ Fetch Data", type="primary", use_container_width=True, key="hs_fetch_btn") with info_col: if has_data: n = len(st.session_state["hs_analysis_df"]) st.success(f"βœ… Showing **{n:,}** conversations matching your filters") elif not fetch_clicked: st.info("πŸ‘† Set your filters and click **Fetch Data** to query Snowflake.") if fetch_clicked: with st.spinner("Fetching HelpScout data from Snowflake…"): result_df = data_loader.load_analysis_data( sentiments=selected_sentiments or None, topics=selected_topics or None, refund_only=refund_only, cancel_only=cancel_only, membership_only=membership_only, statuses=selected_statuses or None, sources=selected_sources or None, date_range=(date_range[0], date_range[1]) if dr_tuple else None, top_n=top_n or None, ) applied_filters = { "date_range": (date_range[0], date_range[1]) if dr_tuple else None, "sentiments": selected_sentiments, "topics": selected_topics, "statuses": selected_statuses, "sources": selected_sources, "refund_only": refund_only, "cancel_only": cancel_only, "membership_only": membership_only, "member_status": member_status_filter, } st.session_state["hs_analysis_df"] = result_df st.session_state["hs_analysis_fetch_key"] = fetch_key st.session_state["hs_analysis_filter_desc"] = build_filter_description(applied_filters, taxonomy) # Invalidate any prior summary when filters change st.session_state.pop("hs_analysis_summary", None) st.session_state.pop("hs_analysis_summary_key", None) st.session_state["hs_analysis_page"] = 1 st.rerun() if not has_data and not fetch_clicked: return analysis_df = st.session_state.get("hs_analysis_df", pd.DataFrame()).copy() filter_desc = st.session_state.get("hs_analysis_filter_desc", "No filters applied") # Derive is_member from dashboard df (always, so breakdown charts work on "All" too) if "customer_email" in analysis_df.columns: hs_dashboard = st.session_state.get("helpscout_df", pd.DataFrame()) if "is_member" in hs_dashboard.columns and not hs_dashboard.empty: member_emails = set( hs_dashboard[hs_dashboard["is_member"]]["customer_email"].str.lower().dropna() ) analysis_df["is_member"] = analysis_df["customer_email"].str.lower().isin(member_emails) # Apply filter when a specific group is selected if member_status_filter == "Members Only": analysis_df = analysis_df[analysis_df["is_member"]] elif member_status_filter == "Non-Members Only": analysis_df = analysis_df[~analysis_df["is_member"]] elif member_status_filter != "All": st.warning("Member data not available β€” customer emails could not be matched to Musora records.") if analysis_df.empty: st.warning("No conversations found for the selected filters. Try adjusting and re-fetching.") return total = len(analysis_df) flags = boolean_flag_counts(analysis_df) neg_pct = analysis_df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() / total * 100 avg_dur = float(analysis_df["duration_hours"].mean()) if "duration_hours" in analysis_df.columns else 0.0 # ── KPI Row ─────────────────────────────────────────────────────────────── st.markdown("### πŸ“Š Overview") k1, k2, k3, k4, k5 = st.columns(5) k1.metric("Conversations", f"{total:,}") k2.metric("Negative %", f"{neg_pct:.1f}%") k3.metric("Refund Requests", f"{flags['is_refund_request']:,}") k4.metric("Cancellations", f"{flags['is_cancellation']:,}") k5.metric("Avg Duration (h)", f"{avg_dur:.1f}") st.caption(f"**Active filters:** {filter_desc}") st.markdown("---") # ── Distributions ───────────────────────────────────────────────────────── st.markdown("### πŸ“ˆ Distributions") col1, col2 = st.columns(2) with col1: st.plotly_chart(charts.create_sentiment_pie_chart(analysis_df, title="Sentiment Distribution"), use_container_width=True, key="hs_analysis_sent_pie") with col2: st.plotly_chart(charts.create_topic_bar_chart(analysis_df, title="Topic Distribution"), use_container_width=True, key="hs_analysis_topic_bar") col1, col2 = st.columns(2) with col1: st.plotly_chart(charts.create_topic_sentiment_heatmap(analysis_df), use_container_width=True, key="hs_analysis_topic_heatmap") with col2: st.plotly_chart(charts.create_boolean_flags_chart(analysis_df), use_container_width=True, key="hs_analysis_flags") if "emotions" in analysis_df.columns and analysis_df["emotions"].notna().any(): col1, col2 = st.columns(2) with col1: st.plotly_chart(charts.create_emotion_bar_chart(analysis_df, title="Emotion Distribution"), use_container_width=True, key="hs_analysis_emotion") with col2: st.plotly_chart(charts.create_volume_timeline(analysis_df, title="Volume Over Time"), use_container_width=True, key="hs_analysis_vol_timeline") else: st.plotly_chart(charts.create_volume_timeline(analysis_df, title="Volume Over Time"), use_container_width=True, key="hs_analysis_vol_timeline2") # Member vs Non-Member breakdown (only when both groups are present in the view) if "is_member" in analysis_df.columns: st.markdown("### πŸ‘€ Member vs Non-Member") col1, col2 = st.columns(2) with col1: st.plotly_chart(charts.create_member_status_chart(analysis_df, title="Member vs Non-Member"), use_container_width=True, key="hs_analysis_member_pie") with col2: st.plotly_chart(charts.create_member_sentiment_chart(analysis_df, title="Sentiment by Member Status"), use_container_width=True, key="hs_analysis_member_sentiment") st.plotly_chart(charts.create_member_topic_chart(analysis_df, title="Top Topics by Member Status"), use_container_width=True, key="hs_analysis_member_topics") st.markdown("---") # ── AI Summary Report ───────────────────────────────────────────────────── st.markdown("### πŸ€– AI Summary Report") st.markdown( "Generate an LLM-powered report from the conversation summaries matching your filters. " "The AI looks beyond the pre-extracted tags to surface patterns, pain points, " "and actionable insights." ) summary_available = ( "hs_analysis_summary" in st.session_state and st.session_state.get("hs_analysis_summary_key") == fetch_key and st.session_state["hs_analysis_summary"] is not None ) gen_col, pdf_col = st.columns([1, 1]) with gen_col: gen_clicked = st.button("🧠 Generate Summary Report", type="primary", use_container_width=True, key="hs_gen_summary_btn") with pdf_col: export_pdf_clicked = st.button("πŸ“„ Export as PDF", use_container_width=True, key="hs_export_pdf_btn") if gen_clicked: with st.spinner("Analysing conversations with AI… this may take 20–40 seconds…"): agent = HelpScoutSummaryAgent() result = agent.process({ "conversations": analysis_df, "filter_description": filter_desc, }) st.session_state["hs_analysis_summary"] = result st.session_state["hs_analysis_summary_key"] = fetch_key st.rerun() if export_pdf_clicked: with st.spinner("Generating PDF…"): try: from utils.helpscout_pdf import HelpScoutAnalysisPDF import datetime summary_result = st.session_state.get("hs_analysis_summary") exporter = HelpScoutAnalysisPDF() pdf_bytes = exporter.generate_report( analysis_df, filter_info={"Filters": filter_desc, "Total Conversations": str(total)}, summary_result=summary_result, ) filename = f"helpscout_analysis_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.pdf" st.success("Report generated!") st.download_button( label="Download Analysis PDF", data=pdf_bytes, file_name=filename, mime="application/pdf", use_container_width=True, key="hs_download_pdf_btn", ) except Exception as e: st.error(f"Failed to generate PDF: {e}") st.exception(e) # Render the summary if available if summary_available: result = st.session_state["hs_analysis_summary"] _render_summary_report(result) st.markdown("---") # ── Conversation Cards ──────────────────────────────────────────────────── st.markdown("### πŸ’¬ Conversations") if "hs_analysis_page" not in st.session_state: st.session_state.hs_analysis_page = 1 per_page = 10 total_pages = max(1, (total + per_page - 1) // per_page) if total > per_page: st.info(f"Page {st.session_state.hs_analysis_page} of {total_pages} ({total:,} conversations)") pc1, pc2, pc3 = st.columns([1, 2, 1]) with pc1: if st.button("⬅️ Previous", key="hs_prev_top", disabled=st.session_state.hs_analysis_page == 1): st.session_state.hs_analysis_page -= 1 st.rerun() with pc2: st.markdown( f"
" f"Page {st.session_state.hs_analysis_page} / {total_pages}
", unsafe_allow_html=True, ) with pc3: if st.button("Next ➑️", key="hs_next_top", disabled=st.session_state.hs_analysis_page >= total_pages): st.session_state.hs_analysis_page += 1 st.rerun() st.markdown("---") start = (st.session_state.hs_analysis_page - 1) * per_page end = min(start + per_page, total) page_df = analysis_df.iloc[start:end] for _, row in page_df.iterrows(): _render_conversation_card(row, taxonomy) # Bottom pagination if total > per_page: pb1, pb2, pb3 = st.columns([1, 2, 1]) with pb1: if st.button("⬅️ Previous", key="hs_prev_bot", disabled=st.session_state.hs_analysis_page == 1): st.session_state.hs_analysis_page -= 1 st.rerun() with pb2: st.markdown( f"
" f"Page {st.session_state.hs_analysis_page} / {total_pages}
", unsafe_allow_html=True, ) with pb3: if st.button("Next ➑️", key="hs_next_bot", disabled=st.session_state.hs_analysis_page >= total_pages): st.session_state.hs_analysis_page += 1 st.rerun() st.markdown("---") # ── Export CSV ──────────────────────────────────────────────────────────── st.markdown("### πŸ’Ύ Export Data") export_cols = [c for c in ["conversation_id", "customer_email", "first_message_at", "status", "sentiment_polarity", "topics", "summary", "is_refund_request", "is_cancellation", "is_membership", "duration_hours"] if c in analysis_df.columns] csv = analysis_df[export_cols].to_csv(index=False) st.download_button( label="πŸ“₯ Download as CSV", data=csv, file_name=f"helpscout_analysis_{total}conversations.csv", mime="text/csv", key="hs_csv_download", ) # ───────────────────────────────────────────────────────────────────────────── # Helper renderers # ───────────────────────────────────────────────────────────────────────────── def _render_summary_report(result: dict): """Render the LLM summary result with nice formatting.""" if not result.get("success"): st.error(f"AI analysis failed: {result.get('error', 'Unknown error')}") return summary = result.get("summary", {}) meta = result.get("metadata", {}) with st.container(): st.markdown("---") st.markdown("#### πŸ“‹ Executive Summary") st.info(summary.get("executive_summary", "")) col1, col2 = st.columns(2) with col1: themes = summary.get("top_themes", []) if themes: st.markdown("#### 🎯 Top Themes") for t in themes: st.markdown( f"**{t.get('theme', '')}** _{t.get('prevalence', '')}_ \n" f"{t.get('description', '')}" ) st.markdown("") insights = summary.get("unexpected_insights", []) if insights: st.markdown("#### πŸ’‘ Unexpected Insights") for ins in insights: st.markdown(f"- {ins}") with col2: complaints = summary.get("top_complaints", []) if complaints: st.markdown("#### ⚠️ Top Complaints") for c in complaints: st.markdown(f"- {c}") quotes = summary.get("notable_quotes", []) if quotes: st.markdown("#### πŸ’¬ Notable Quotes") for q in quotes: st.markdown(f"> {q}") with st.expander("ℹ️ Analysis Metadata"): mc1, mc2, mc3 = st.columns(3) mc1.metric("Conversations Analysed", meta.get("total_conversations_analyzed", 0)) mc2.metric("Model Used", meta.get("model_used", "N/A")) mc3.metric("Tokens Used", meta.get("tokens_used", 0)) if meta.get("total_available", 0) > meta.get("total_conversations_analyzed", 0): st.caption( f"Sampled {meta['total_conversations_analyzed']} of " f"{meta['total_available']} conversations for this analysis." ) def _render_conversation_card(row, taxonomy: dict): """Render a single conversation card.""" sent = str(row.get("sentiment_polarity", "unknown")) sent_emoji = { "very_positive": "🟒", "positive": "🟩", "neutral": "🟑", "negative": "🟠", "very_negative": "πŸ”΄", }.get(sent, "βšͺ") topics_list = row.get("topics_list") or [] topic_labels_str = ", ".join(topic_label(t, taxonomy) for t in topics_list) if topics_list else "β€”" first_name = str(row.get("customer_first") or "").strip() last_name = str(row.get("customer_last") or "").strip() customer_str = f"{first_name} {last_name[:1]}." if first_name or last_name else "Anonymous" first_msg = row.get("first_message_at") date_str = first_msg.strftime("%Y-%m-%d") if hasattr(first_msg, "strftime") else str(first_msg or "") flags = [] if row.get("is_refund_request"): flags.append("πŸ’° Refund") if row.get("is_cancellation"): flags.append("🚫 Cancel") if row.get("is_membership"): flags.append("βœ… Membership") flags_str = " | ".join(flags) if flags else "" with st.expander( f"{sent_emoji} {customer_str} β€” {topic_labels_str} | {sent.replace('_', ' ').title()} | {date_str}" + (f" [{flags_str}]" if flags_str else ""), expanded=False, ): info_col1, info_col2, info_col3 = st.columns(3) info_col1.markdown(f"**Status:** {row.get('status', 'β€”')}") info_col2.markdown(f"**Source:** {row.get('source_type', 'β€”')}") info_col3.markdown(f"**Duration:** {row.get('duration_hours', 0):.1f}h | **Threads:** {row.get('thread_count', 0)}") summary = str(row.get("summary") or "No summary available.") st.markdown(f"**Summary:** {summary}") notes_col1, notes_col2 = st.columns(2) with notes_col1: sent_note = str(row.get("sentiment_notes") or "") if sent_note: st.markdown(f"**Sentiment Note:** _{sent_note}_") with notes_col2: topic_note = str(row.get("topic_notes") or "") if topic_note: st.markdown(f"**Topic Note:** _{topic_note}_")