| """ |
| HelpScout Analysis Page |
| Purpose-built analysis page for HelpScout conversations. |
| Mirrors the SA page architecture: filter β fetch β charts β LLM summary β export. |
| One page-level summary report for the entire filtered set. |
| """ |
| import sys |
| from datetime import date, timedelta |
| from pathlib import Path |
|
|
| import pandas as pd |
| import streamlit as st |
|
|
| parent_dir = Path(__file__).resolve().parent.parent |
| sys.path.append(str(parent_dir)) |
|
|
| from visualizations.helpscout_charts import HelpScoutCharts |
| from utils.helpscout_utils import ( |
| boolean_flag_counts, build_filter_description, topic_label, load_topic_taxonomy |
| ) |
| from agents.helpscout_summary_agent import HelpScoutSummaryAgent |
|
|
|
|
| def render_helpscout_analysis(data_loader): |
| """ |
| Render the HelpScout Analysis page. |
| |
| Args: |
| data_loader: HelpScoutDataLoader instance |
| """ |
| st.title("π¬ HelpScout Analysis") |
| st.markdown( |
| "Deep-dive into customer support conversations. Apply filters, fetch the data, " |
| "explore distributions, and generate an AI-powered summary report." |
| ) |
| st.markdown("---") |
|
|
| charts = HelpScoutCharts() |
| taxonomy = load_topic_taxonomy() |
|
|
| |
| hs_df = st.session_state.get("helpscout_df") |
| if hs_df is None or hs_df.empty: |
| st.warning("HelpScout dashboard data not loaded yet. Please wait for the app to initialise.") |
| return |
|
|
| filter_options = data_loader.get_filter_options(hs_df) |
|
|
| |
| st.markdown("### π― Filters") |
|
|
| row1_col1, row1_col2 = st.columns(2) |
| with row1_col1: |
| min_date = hs_df["first_message_at"].min().date() if "first_message_at" in hs_df.columns and not hs_df.empty else date.today() - timedelta(days=60) |
| max_date = hs_df["first_message_at"].max().date() if "first_message_at" in hs_df.columns and not hs_df.empty else date.today() |
| default_start = max(min_date, max_date - timedelta(days=data_loader.default_date_range_days)) |
| date_range = st.date_input( |
| "Date Range (First Message At)", |
| value=(default_start, max_date), |
| min_value=min_date, max_value=max_date, |
| key="hs_analysis_date_range", |
| ) |
| with row1_col2: |
| top_n_options = [("All", 0), ("50", 50), ("100", 100), ("200", 200), ("500", 500), ("1000", 1000)] |
| top_n_label = st.selectbox( |
| "Limit Results", |
| options=[x[0] for x in top_n_options], |
| index=0, |
| help="Limit number of conversations fetched. 'All' fetches everything matching your filters.", |
| key="hs_analysis_top_n", |
| ) |
| top_n = dict(top_n_options)[top_n_label] |
|
|
| row2_col1, row2_col2, row2_col3, row2_col4 = st.columns(4) |
| with row2_col1: |
| topic_options = filter_options.get("topics", []) |
| topic_labels_map = {t: topic_label(t, taxonomy) for t in topic_options} |
| selected_topic_labels = st.multiselect( |
| "Topics", |
| options=[topic_labels_map[t] for t in topic_options], |
| default=[], |
| key="hs_analysis_topics", |
| ) |
| label_to_id = {v: k for k, v in topic_labels_map.items()} |
| selected_topics = [label_to_id[l] for l in selected_topic_labels if l in label_to_id] |
|
|
| with row2_col2: |
| selected_sentiments = st.multiselect( |
| "Sentiments", |
| options=filter_options.get("sentiments", []), |
| default=[], |
| key="hs_analysis_sentiments", |
| ) |
|
|
| with row2_col3: |
| selected_statuses = st.multiselect( |
| "Status", |
| options=filter_options.get("statuses", []), |
| default=[], |
| key="hs_analysis_statuses", |
| ) |
|
|
| with row2_col4: |
| selected_sources = st.multiselect( |
| "Source Type", |
| options=filter_options.get("sources", []), |
| default=[], |
| key="hs_analysis_sources", |
| ) |
|
|
| row3_col1, row3_col2, row3_col3, row3_col4 = st.columns(4) |
| with row3_col1: |
| refund_only = st.checkbox("Refund Requests Only", key="hs_analysis_refund") |
| with row3_col2: |
| cancel_only = st.checkbox("Cancellations Only", key="hs_analysis_cancel") |
| with row3_col3: |
| membership_only = st.checkbox("Membership Joins Only", key="hs_analysis_membership") |
| with row3_col4: |
| member_status_filter = st.selectbox( |
| "Customer Type", |
| options=["All", "Members Only", "Non-Members Only"], |
| index=0, |
| help="Members are customers whose email matches a Musora user account.", |
| key="hs_analysis_member_status", |
| ) |
|
|
| st.markdown("---") |
|
|
| |
| dr_tuple = (str(date_range[0]), str(date_range[1])) if date_range and len(date_range) == 2 else None |
|
|
| fetch_key = ( |
| dr_tuple, |
| tuple(sorted(selected_sentiments)), |
| tuple(sorted(selected_topics)), |
| tuple(sorted(selected_statuses)), |
| tuple(sorted(selected_sources)), |
| bool(refund_only), bool(cancel_only), bool(membership_only), |
| top_n, |
| ) |
|
|
| has_data = ( |
| "hs_analysis_df" in st.session_state |
| and st.session_state.get("hs_analysis_fetch_key") == fetch_key |
| and not st.session_state["hs_analysis_df"].empty |
| ) |
|
|
| fetch_col, info_col = st.columns([1, 3]) |
| with fetch_col: |
| fetch_clicked = st.button("π Fetch Data", type="primary", |
| use_container_width=True, key="hs_fetch_btn") |
| with info_col: |
| if has_data: |
| n = len(st.session_state["hs_analysis_df"]) |
| st.success(f"β
Showing **{n:,}** conversations matching your filters") |
| elif not fetch_clicked: |
| st.info("π Set your filters and click **Fetch Data** to query Snowflake.") |
|
|
| if fetch_clicked: |
| with st.spinner("Fetching HelpScout data from Snowflakeβ¦"): |
| result_df = data_loader.load_analysis_data( |
| sentiments=selected_sentiments or None, |
| topics=selected_topics or None, |
| refund_only=refund_only, |
| cancel_only=cancel_only, |
| membership_only=membership_only, |
| statuses=selected_statuses or None, |
| sources=selected_sources or None, |
| date_range=(date_range[0], date_range[1]) if dr_tuple else None, |
| top_n=top_n or None, |
| ) |
| applied_filters = { |
| "date_range": (date_range[0], date_range[1]) if dr_tuple else None, |
| "sentiments": selected_sentiments, |
| "topics": selected_topics, |
| "statuses": selected_statuses, |
| "sources": selected_sources, |
| "refund_only": refund_only, |
| "cancel_only": cancel_only, |
| "membership_only": membership_only, |
| "member_status": member_status_filter, |
| } |
| st.session_state["hs_analysis_df"] = result_df |
| st.session_state["hs_analysis_fetch_key"] = fetch_key |
| st.session_state["hs_analysis_filter_desc"] = build_filter_description(applied_filters, taxonomy) |
| |
| st.session_state.pop("hs_analysis_summary", None) |
| st.session_state.pop("hs_analysis_summary_key", None) |
| st.session_state["hs_analysis_page"] = 1 |
| st.rerun() |
|
|
| if not has_data and not fetch_clicked: |
| return |
|
|
| analysis_df = st.session_state.get("hs_analysis_df", pd.DataFrame()).copy() |
| filter_desc = st.session_state.get("hs_analysis_filter_desc", "No filters applied") |
|
|
| |
| if "customer_email" in analysis_df.columns: |
| hs_dashboard = st.session_state.get("helpscout_df", pd.DataFrame()) |
| if "is_member" in hs_dashboard.columns and not hs_dashboard.empty: |
| member_emails = set( |
| hs_dashboard[hs_dashboard["is_member"]]["customer_email"].str.lower().dropna() |
| ) |
| analysis_df["is_member"] = analysis_df["customer_email"].str.lower().isin(member_emails) |
| |
| if member_status_filter == "Members Only": |
| analysis_df = analysis_df[analysis_df["is_member"]] |
| elif member_status_filter == "Non-Members Only": |
| analysis_df = analysis_df[~analysis_df["is_member"]] |
| elif member_status_filter != "All": |
| st.warning("Member data not available β customer emails could not be matched to Musora records.") |
|
|
| if analysis_df.empty: |
| st.warning("No conversations found for the selected filters. Try adjusting and re-fetching.") |
| return |
|
|
| total = len(analysis_df) |
| flags = boolean_flag_counts(analysis_df) |
| neg_pct = analysis_df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() / total * 100 |
| avg_dur = float(analysis_df["duration_hours"].mean()) if "duration_hours" in analysis_df.columns else 0.0 |
|
|
| |
| st.markdown("### π Overview") |
| k1, k2, k3, k4, k5 = st.columns(5) |
| k1.metric("Conversations", f"{total:,}") |
| k2.metric("Negative %", f"{neg_pct:.1f}%") |
| k3.metric("Refund Requests", f"{flags['is_refund_request']:,}") |
| k4.metric("Cancellations", f"{flags['is_cancellation']:,}") |
| k5.metric("Avg Duration (h)", f"{avg_dur:.1f}") |
|
|
| st.caption(f"**Active filters:** {filter_desc}") |
| st.markdown("---") |
|
|
| |
| st.markdown("### π Distributions") |
|
|
| col1, col2 = st.columns(2) |
| with col1: |
| st.plotly_chart(charts.create_sentiment_pie_chart(analysis_df, title="Sentiment Distribution"), |
| use_container_width=True, key="hs_analysis_sent_pie") |
| with col2: |
| st.plotly_chart(charts.create_topic_bar_chart(analysis_df, title="Topic Distribution"), |
| use_container_width=True, key="hs_analysis_topic_bar") |
|
|
| col1, col2 = st.columns(2) |
| with col1: |
| st.plotly_chart(charts.create_topic_sentiment_heatmap(analysis_df), |
| use_container_width=True, key="hs_analysis_topic_heatmap") |
| with col2: |
| st.plotly_chart(charts.create_boolean_flags_chart(analysis_df), |
| use_container_width=True, key="hs_analysis_flags") |
|
|
| if "emotions" in analysis_df.columns and analysis_df["emotions"].notna().any(): |
| col1, col2 = st.columns(2) |
| with col1: |
| st.plotly_chart(charts.create_emotion_bar_chart(analysis_df, title="Emotion Distribution"), |
| use_container_width=True, key="hs_analysis_emotion") |
| with col2: |
| st.plotly_chart(charts.create_volume_timeline(analysis_df, title="Volume Over Time"), |
| use_container_width=True, key="hs_analysis_vol_timeline") |
| else: |
| st.plotly_chart(charts.create_volume_timeline(analysis_df, title="Volume Over Time"), |
| use_container_width=True, key="hs_analysis_vol_timeline2") |
|
|
| |
| if "is_member" in analysis_df.columns: |
| st.markdown("### π€ Member vs Non-Member") |
| col1, col2 = st.columns(2) |
| with col1: |
| st.plotly_chart(charts.create_member_status_chart(analysis_df, |
| title="Member vs Non-Member"), |
| use_container_width=True, key="hs_analysis_member_pie") |
| with col2: |
| st.plotly_chart(charts.create_member_sentiment_chart(analysis_df, |
| title="Sentiment by Member Status"), |
| use_container_width=True, key="hs_analysis_member_sentiment") |
| st.plotly_chart(charts.create_member_topic_chart(analysis_df, |
| title="Top Topics by Member Status"), |
| use_container_width=True, key="hs_analysis_member_topics") |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π€ AI Summary Report") |
| st.markdown( |
| "Generate an LLM-powered report from the conversation summaries matching your filters. " |
| "The AI looks beyond the pre-extracted tags to surface patterns, pain points, " |
| "and actionable insights." |
| ) |
|
|
| summary_available = ( |
| "hs_analysis_summary" in st.session_state |
| and st.session_state.get("hs_analysis_summary_key") == fetch_key |
| and st.session_state["hs_analysis_summary"] is not None |
| ) |
|
|
| gen_col, pdf_col = st.columns([1, 1]) |
| with gen_col: |
| gen_clicked = st.button("π§ Generate Summary Report", type="primary", |
| use_container_width=True, key="hs_gen_summary_btn") |
| with pdf_col: |
| export_pdf_clicked = st.button("π Export as PDF", use_container_width=True, |
| key="hs_export_pdf_btn") |
|
|
| if gen_clicked: |
| with st.spinner("Analysing conversations with AIβ¦ this may take 20β40 secondsβ¦"): |
| agent = HelpScoutSummaryAgent() |
| result = agent.process({ |
| "conversations": analysis_df, |
| "filter_description": filter_desc, |
| }) |
| st.session_state["hs_analysis_summary"] = result |
| st.session_state["hs_analysis_summary_key"] = fetch_key |
| st.rerun() |
|
|
| if export_pdf_clicked: |
| with st.spinner("Generating PDFβ¦"): |
| try: |
| from utils.helpscout_pdf import HelpScoutAnalysisPDF |
| import datetime |
| summary_result = st.session_state.get("hs_analysis_summary") |
| exporter = HelpScoutAnalysisPDF() |
| pdf_bytes = exporter.generate_report( |
| analysis_df, |
| filter_info={"Filters": filter_desc, "Total Conversations": str(total)}, |
| summary_result=summary_result, |
| ) |
| filename = f"helpscout_analysis_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.pdf" |
| st.success("Report generated!") |
| st.download_button( |
| label="Download Analysis PDF", |
| data=pdf_bytes, |
| file_name=filename, |
| mime="application/pdf", |
| use_container_width=True, |
| key="hs_download_pdf_btn", |
| ) |
| except Exception as e: |
| st.error(f"Failed to generate PDF: {e}") |
| st.exception(e) |
|
|
| |
| if summary_available: |
| result = st.session_state["hs_analysis_summary"] |
| _render_summary_report(result) |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### π¬ Conversations") |
|
|
| if "hs_analysis_page" not in st.session_state: |
| st.session_state.hs_analysis_page = 1 |
|
|
| per_page = 10 |
| total_pages = max(1, (total + per_page - 1) // per_page) |
|
|
| if total > per_page: |
| st.info(f"Page {st.session_state.hs_analysis_page} of {total_pages} ({total:,} conversations)") |
| pc1, pc2, pc3 = st.columns([1, 2, 1]) |
| with pc1: |
| if st.button("β¬
οΈ Previous", key="hs_prev_top", |
| disabled=st.session_state.hs_analysis_page == 1): |
| st.session_state.hs_analysis_page -= 1 |
| st.rerun() |
| with pc2: |
| st.markdown( |
| f"<div style='text-align:center;padding-top:8px;'>" |
| f"Page {st.session_state.hs_analysis_page} / {total_pages}</div>", |
| unsafe_allow_html=True, |
| ) |
| with pc3: |
| if st.button("Next β‘οΈ", key="hs_next_top", |
| disabled=st.session_state.hs_analysis_page >= total_pages): |
| st.session_state.hs_analysis_page += 1 |
| st.rerun() |
| st.markdown("---") |
|
|
| start = (st.session_state.hs_analysis_page - 1) * per_page |
| end = min(start + per_page, total) |
| page_df = analysis_df.iloc[start:end] |
|
|
| for _, row in page_df.iterrows(): |
| _render_conversation_card(row, taxonomy) |
|
|
| |
| if total > per_page: |
| pb1, pb2, pb3 = st.columns([1, 2, 1]) |
| with pb1: |
| if st.button("β¬
οΈ Previous", key="hs_prev_bot", |
| disabled=st.session_state.hs_analysis_page == 1): |
| st.session_state.hs_analysis_page -= 1 |
| st.rerun() |
| with pb2: |
| st.markdown( |
| f"<div style='text-align:center;padding-top:8px;'>" |
| f"Page {st.session_state.hs_analysis_page} / {total_pages}</div>", |
| unsafe_allow_html=True, |
| ) |
| with pb3: |
| if st.button("Next β‘οΈ", key="hs_next_bot", |
| disabled=st.session_state.hs_analysis_page >= total_pages): |
| st.session_state.hs_analysis_page += 1 |
| st.rerun() |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### πΎ Export Data") |
| export_cols = [c for c in ["conversation_id", "customer_email", "first_message_at", |
| "status", "sentiment_polarity", "topics", "summary", |
| "is_refund_request", "is_cancellation", "is_membership", |
| "duration_hours"] if c in analysis_df.columns] |
| csv = analysis_df[export_cols].to_csv(index=False) |
| st.download_button( |
| label="π₯ Download as CSV", |
| data=csv, |
| file_name=f"helpscout_analysis_{total}conversations.csv", |
| mime="text/csv", |
| key="hs_csv_download", |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def _render_summary_report(result: dict): |
| """Render the LLM summary result with nice formatting.""" |
| if not result.get("success"): |
| st.error(f"AI analysis failed: {result.get('error', 'Unknown error')}") |
| return |
|
|
| summary = result.get("summary", {}) |
| meta = result.get("metadata", {}) |
|
|
| with st.container(): |
| st.markdown("---") |
| st.markdown("#### π Executive Summary") |
| st.info(summary.get("executive_summary", "")) |
|
|
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| themes = summary.get("top_themes", []) |
| if themes: |
| st.markdown("#### π― Top Themes") |
| for t in themes: |
| st.markdown( |
| f"**{t.get('theme', '')}** _{t.get('prevalence', '')}_ \n" |
| f"{t.get('description', '')}" |
| ) |
| st.markdown("") |
|
|
| insights = summary.get("unexpected_insights", []) |
| if insights: |
| st.markdown("#### π‘ Unexpected Insights") |
| for ins in insights: |
| st.markdown(f"- {ins}") |
|
|
| with col2: |
| complaints = summary.get("top_complaints", []) |
| if complaints: |
| st.markdown("#### β οΈ Top Complaints") |
| for c in complaints: |
| st.markdown(f"- {c}") |
|
|
| quotes = summary.get("notable_quotes", []) |
| if quotes: |
| st.markdown("#### π¬ Notable Quotes") |
| for q in quotes: |
| st.markdown(f"> {q}") |
|
|
| with st.expander("βΉοΈ Analysis Metadata"): |
| mc1, mc2, mc3 = st.columns(3) |
| mc1.metric("Conversations Analysed", meta.get("total_conversations_analyzed", 0)) |
| mc2.metric("Model Used", meta.get("model_used", "N/A")) |
| mc3.metric("Tokens Used", meta.get("tokens_used", 0)) |
| if meta.get("total_available", 0) > meta.get("total_conversations_analyzed", 0): |
| st.caption( |
| f"Sampled {meta['total_conversations_analyzed']} of " |
| f"{meta['total_available']} conversations for this analysis." |
| ) |
|
|
|
|
| def _render_conversation_card(row, taxonomy: dict): |
| """Render a single conversation card.""" |
| sent = str(row.get("sentiment_polarity", "unknown")) |
| sent_emoji = { |
| "very_positive": "π’", "positive": "π©", "neutral": "π‘", |
| "negative": "π ", "very_negative": "π΄", |
| }.get(sent, "βͺ") |
|
|
| topics_list = row.get("topics_list") or [] |
| topic_labels_str = ", ".join(topic_label(t, taxonomy) for t in topics_list) if topics_list else "β" |
|
|
| first_name = str(row.get("customer_first") or "").strip() |
| last_name = str(row.get("customer_last") or "").strip() |
| customer_str = f"{first_name} {last_name[:1]}." if first_name or last_name else "Anonymous" |
|
|
| first_msg = row.get("first_message_at") |
| date_str = first_msg.strftime("%Y-%m-%d") if hasattr(first_msg, "strftime") else str(first_msg or "") |
|
|
| flags = [] |
| if row.get("is_refund_request"): flags.append("π° Refund") |
| if row.get("is_cancellation"): flags.append("π« Cancel") |
| if row.get("is_membership"): flags.append("β
Membership") |
| flags_str = " | ".join(flags) if flags else "" |
|
|
| with st.expander( |
| f"{sent_emoji} {customer_str} β {topic_labels_str} | {sent.replace('_', ' ').title()} | {date_str}" |
| + (f" [{flags_str}]" if flags_str else ""), |
| expanded=False, |
| ): |
| info_col1, info_col2, info_col3 = st.columns(3) |
| info_col1.markdown(f"**Status:** {row.get('status', 'β')}") |
| info_col2.markdown(f"**Source:** {row.get('source_type', 'β')}") |
| info_col3.markdown(f"**Duration:** {row.get('duration_hours', 0):.1f}h | **Threads:** {row.get('thread_count', 0)}") |
|
|
| summary = str(row.get("summary") or "No summary available.") |
| st.markdown(f"**Summary:** {summary}") |
|
|
| notes_col1, notes_col2 = st.columns(2) |
| with notes_col1: |
| sent_note = str(row.get("sentiment_notes") or "") |
| if sent_note: |
| st.markdown(f"**Sentiment Note:** _{sent_note}_") |
| with notes_col2: |
| topic_note = str(row.get("topic_notes") or "") |
| if topic_note: |
| st.markdown(f"**Topic Note:** _{topic_note}_") |