| """ |
| HelpScout PDF Exporters. |
| |
| Two classes sharing the MusoraPDF base from pdf_exporter.py: |
| - HelpScoutDashboardPDF : full HelpScout dashboard report |
| - HelpScoutAnalysisPDF : filtered analysis report + optional LLM summary |
| """ |
| import logging |
| import os |
| import sys |
| import tempfile |
| from datetime import datetime |
| from pathlib import Path |
|
|
| import plotly.io as pio |
|
|
| _parent = Path(__file__).resolve().parent.parent |
| if str(_parent) not in sys.path: |
| sys.path.insert(0, str(_parent)) |
|
|
| from utils.pdf_exporter import MusoraPDF |
| from utils.helpscout_utils import boolean_flag_counts, topic_label, load_topic_taxonomy |
| from visualizations.helpscout_charts import HelpScoutCharts |
|
|
| logger = logging.getLogger(__name__) |
|
|
| _RENDER_SCALE = 3 |
|
|
|
|
| |
| |
| |
|
|
| def _prepare_fig(fig, is_side_by_side=False): |
| base_fs = 13 if is_side_by_side else 14 |
| fig.update_layout( |
| paper_bgcolor="white", plot_bgcolor="white", |
| font=dict(color="black", size=base_fs), |
| title_font_size=base_fs + 4, |
| margin=(dict(l=60, r=40, t=60, b=60) if is_side_by_side else dict(l=80, r=40, t=60, b=80)), |
| ) |
| fig.update_xaxes(automargin=True) |
| fig.update_yaxes(automargin=True) |
|
|
|
|
| def _fig_to_tmp(fig, width=800, height=400, is_side_by_side=False) -> str: |
| _prepare_fig(fig, is_side_by_side) |
| img = pio.to_image(fig, format="png", width=width, height=height, |
| scale=_RENDER_SCALE, engine="kaleido") |
| tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) |
| tmp.write(img) |
| tmp.close() |
| return tmp.name |
|
|
|
|
| def _cleanup(paths): |
| for p in paths: |
| try: |
| os.unlink(p) |
| except OSError: |
| pass |
|
|
|
|
| |
| |
| |
|
|
| class HelpScoutDashboardPDF: |
| """ |
| Generates a comprehensive HelpScout dashboard PDF report. |
| """ |
|
|
| def __init__(self): |
| self.charts = HelpScoutCharts() |
| self.taxonomy = load_topic_taxonomy() |
| self._tmp: list = [] |
|
|
| def generate_report(self, df, filter_info: dict = None) -> bytes: |
| """Build and return the full dashboard PDF.""" |
| self.pdf = MusoraPDF() |
| self._tmp = [] |
| try: |
| self._cover(df, filter_info) |
| self._executive_summary(df) |
| self._sentiment_section(df) |
| self._topic_section(df) |
| self._emotion_section(df) |
| self._flags_section(df) |
| self._status_source_section(df) |
| self._timelines_section(df) |
| self._depth_section(df) |
| self._member_section(df) |
| self._data_summary(df, filter_info) |
| return bytes(self.pdf.output()) |
| finally: |
| _cleanup(self._tmp) |
|
|
| |
|
|
| def _add_chart(self, fig, width=180, img_w=800, img_h=400): |
| try: |
| p = _fig_to_tmp(fig, img_w, img_h) |
| self._tmp.append(p) |
| h_mm = width * (img_h / img_w) |
| self.pdf.check_page_break(h_mm + 5) |
| self.pdf.image(p, x=10, w=width) |
| self.pdf.ln(3) |
| except Exception: |
| logger.exception("Chart render failed") |
| self.pdf.body_text("[Chart could not be rendered]") |
|
|
| def _add_two_charts(self, fig1, fig2, width=92): |
| try: |
| p1 = _fig_to_tmp(fig1, 700, 450, is_side_by_side=True); self._tmp.append(p1) |
| p2 = _fig_to_tmp(fig2, 700, 450, is_side_by_side=True); self._tmp.append(p2) |
| h_mm = width * (450 / 700) |
| self.pdf.check_page_break(h_mm + 5) |
| y = self.pdf.get_y() |
| self.pdf.image(p1, x=10, y=y, w=width) |
| self.pdf.image(p2, x=10 + width + 4, y=y, w=width) |
| self.pdf.set_y(y + h_mm + 3) |
| except Exception: |
| logger.exception("Side-by-side render failed") |
| self.pdf.body_text("[Charts could not be rendered]") |
|
|
| |
|
|
| def _cover(self, df, filter_info): |
| self.pdf.add_page() |
| self.pdf.ln(40) |
| r, g, b = MusoraPDF.PRIMARY |
| self.pdf.set_fill_color(r, g, b) |
| self.pdf.rect(0, 60, 210, 4, style="F") |
| self.pdf.ln(20) |
| self.pdf.set_font("Helvetica", "B", 28) |
| self.pdf.set_text_color(r, g, b) |
| self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.set_font("Helvetica", "", 16) |
| self.pdf.set_text_color(80, 80, 80) |
| self.pdf.cell(0, 10, "HelpScout Support Dashboard Report", |
| align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.ln(10) |
| self.pdf.set_font("Helvetica", "", 12) |
| self.pdf.set_text_color(100, 100, 100) |
| self.pdf.cell(0, 8, f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}", |
| align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.ln(5) |
| self.pdf.set_font("Helvetica", "", 10) |
| self.pdf.cell(0, 7, f"Total Conversations: {len(df):,}", |
| align="C", new_x="LMARGIN", new_y="NEXT") |
| if "first_message_at" in df.columns and not df.empty: |
| valid = df["first_message_at"].dropna() |
| if not valid.empty: |
| dr = f"{valid.min().strftime('%b %d, %Y')} to {valid.max().strftime('%b %d, %Y')}" |
| self.pdf.ln(3) |
| self.pdf.set_font("Helvetica", "I", 9) |
| self.pdf.set_text_color(120, 120, 120) |
| self.pdf.cell(0, 6, MusoraPDF._sanitize(f"Data period: {dr}"), |
| align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.ln(20) |
| self.pdf.set_font("Helvetica", "I", 8) |
| self.pdf.set_text_color(150, 150, 150) |
| self.pdf.cell(0, 6, "Confidential - For Internal Use Only", |
| align="C", new_x="LMARGIN", new_y="NEXT") |
|
|
| def _executive_summary(self, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Executive Summary") |
| total = len(df) |
| flags = boolean_flag_counts(df) |
| neg = df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() |
| pos = df["sentiment_polarity"].isin(["positive", "very_positive"]).sum() |
| neg_pct = neg / total * 100 if total else 0 |
| pos_pct = pos / total * 100 if total else 0 |
| esc = int(df["is_escalation"].sum()) if "is_escalation" in df.columns else 0 |
| avg_dur = float(df["duration_hours"].mean()) if "duration_hours" in df.columns else 0 |
|
|
| self.pdf.metric_row([ |
| ("Total Conversations", f"{total:,}"), |
| ("Positive %", f"{pos_pct:.1f}%"), |
| ("Negative %", f"{neg_pct:.1f}%"), |
| ("Avg Duration (h)", f"{avg_dur:.1f}"), |
| ]) |
| self.pdf.metric_row([ |
| ("Escalations", f"{esc:,}"), |
| ("Refund Requests", f"{flags['is_refund_request']:,}"), |
| ("Cancellations", f"{flags['is_cancellation']:,}"), |
| ("Membership Joins", f"{flags['is_membership']:,}"), |
| ]) |
|
|
| def _sentiment_section(self, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Sentiment Distribution") |
| pie = self.charts.create_sentiment_pie_chart(df, title="Sentiment Distribution") |
| gauge = self.charts.create_sentiment_score_gauge(self._avg_score(df)) |
| self._add_two_charts(pie, gauge) |
|
|
| def _topic_section(self, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Topic Analysis") |
| bar = self.charts.create_topic_bar_chart(df, title="Conversations by Topic") |
| pie = self.charts.create_topic_pie_chart(df, title="Topic Share") |
| self._add_two_charts(bar, pie) |
| self._add_chart(self.charts.create_topic_sentiment_heatmap(df), img_h=500) |
|
|
| def _emotion_section(self, df): |
| if "emotions" not in df.columns or df["emotions"].dropna().empty: |
| return |
| self.pdf.add_page() |
| self.pdf.section_header("Emotion Analysis") |
| self._add_chart(self.charts.create_emotion_bar_chart(df, title="Emotion Distribution")) |
|
|
| def _flags_section(self, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Billing & Membership Flags") |
| flags_chart = self.charts.create_boolean_flags_chart(df) |
| esc_chart = self.charts.create_escalation_breakdown(df) |
| self._add_two_charts(flags_chart, esc_chart) |
|
|
| def _status_source_section(self, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Status & Source Distribution") |
| status_chart = self.charts.create_status_distribution(df) |
| source_chart = self.charts.create_source_distribution(df) |
| self._add_two_charts(status_chart, source_chart) |
|
|
| def _timelines_section(self, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Volume & Trends (Weekly)") |
| self._add_chart(self.charts.create_volume_timeline(df, freq="W")) |
| self._add_chart(self.charts.create_sentiment_timeline(df, freq="W")) |
| self._add_chart(self.charts.create_refund_cancel_timeline(df, freq="W")) |
|
|
| def _depth_section(self, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Conversation Depth") |
| dur = self.charts.create_duration_histogram(df) |
| thd = self.charts.create_thread_count_histogram(df) |
| self._add_two_charts(dur, thd) |
|
|
| def _member_section(self, df): |
| if "is_member" not in df.columns: |
| return |
| self.pdf.add_page() |
| self.pdf.section_header("Member vs Non-Member Analysis") |
| total = len(df) |
| member_count = int(df["is_member"].sum()) |
| non_member_count = total - member_count |
| match_pct = member_count / total * 100 if total else 0 |
| self.pdf.metric_row([ |
| ("Members", f"{member_count:,}"), |
| ("Non-Members", f"{non_member_count:,}"), |
| ("Email Match Rate", f"{match_pct:.1f}%"), |
| ]) |
| self.pdf.body_text( |
| "Members are customers whose email was matched against Musora user records. " |
| "Non-Members contacted support without an associated Musora account." |
| ) |
| self._add_two_charts( |
| self.charts.create_member_status_chart(df, title="Member vs Non-Member"), |
| self.charts.create_member_sentiment_chart(df, title="Sentiment by Member Status"), |
| ) |
| self._add_chart( |
| self.charts.create_member_topic_chart(df, title="Top Topics by Member Status"), |
| img_h=500, |
| ) |
|
|
| def _data_summary(self, df, filter_info): |
| self.pdf.add_page() |
| self.pdf.section_header("Data Summary") |
| self.pdf.body_text(f"Report generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
| self.pdf.body_text(f"Total conversations: {len(df):,}") |
| self.pdf.callout_box( |
| "Data source: SOCIAL_MEDIA_DB.ML_FEATURES.HELPSCOUT_CONVERSATION_FEATURES\n" |
| "This report is confidential and intended for internal Musora team use only.", |
| bg_color=(245, 245, 245), |
| ) |
|
|
| @staticmethod |
| def _avg_score(df) -> float: |
| score_map = {"very_positive": 2, "positive": 1, "neutral": 0, |
| "negative": -1, "very_negative": -2} |
| if "sentiment_polarity" not in df.columns or df.empty: |
| return 0.0 |
| return float(df["sentiment_polarity"].map(score_map).fillna(0).mean()) |
|
|
|
|
| |
| |
| |
|
|
| class HelpScoutAnalysisPDF: |
| """ |
| Generates a focused analysis PDF from the HelpScout Analysis page. |
| Includes filter summary, distributions, and optionally the LLM summary report. |
| """ |
|
|
| def __init__(self): |
| self.charts = HelpScoutCharts() |
| self.taxonomy = load_topic_taxonomy() |
| self._tmp: list = [] |
|
|
| def generate_report(self, df, filter_info: dict = None, |
| summary_result: dict = None) -> bytes: |
| """ |
| Build and return the analysis PDF. |
| |
| Args: |
| df: Filtered HelpScout analysis DataFrame. |
| filter_info: Dict of filter descriptions for the cover. |
| summary_result: Output from HelpScoutSummaryAgent.process() or None. |
| """ |
| self.pdf = MusoraPDF() |
| self._tmp = [] |
| try: |
| self._cover(df, filter_info) |
| self._filter_summary_section(filter_info, df) |
| self._kpi_section(df) |
| self._distributions_section(df) |
| self._summary_section(summary_result) |
| self._data_summary(df, filter_info) |
| return bytes(self.pdf.output()) |
| finally: |
| _cleanup(self._tmp) |
|
|
| |
|
|
| def _add_chart(self, fig, width=180, img_w=800, img_h=400): |
| try: |
| p = _fig_to_tmp(fig, img_w, img_h) |
| self._tmp.append(p) |
| h_mm = width * (img_h / img_w) |
| self.pdf.check_page_break(h_mm + 5) |
| self.pdf.image(p, x=10, w=width) |
| self.pdf.ln(3) |
| except Exception: |
| logger.exception("Chart render failed") |
| self.pdf.body_text("[Chart could not be rendered]") |
|
|
| def _add_two_charts(self, fig1, fig2, width=92): |
| try: |
| p1 = _fig_to_tmp(fig1, 700, 450, is_side_by_side=True); self._tmp.append(p1) |
| p2 = _fig_to_tmp(fig2, 700, 450, is_side_by_side=True); self._tmp.append(p2) |
| h_mm = width * (450 / 700) |
| self.pdf.check_page_break(h_mm + 5) |
| y = self.pdf.get_y() |
| self.pdf.image(p1, x=10, y=y, w=width) |
| self.pdf.image(p2, x=10 + width + 4, y=y, w=width) |
| self.pdf.set_y(y + h_mm + 3) |
| except Exception: |
| logger.exception("Side-by-side render failed") |
| self.pdf.body_text("[Charts could not be rendered]") |
|
|
| |
|
|
| def _cover(self, df, filter_info): |
| self.pdf.add_page() |
| self.pdf.ln(40) |
| r, g, b = MusoraPDF.PRIMARY |
| self.pdf.set_fill_color(r, g, b) |
| self.pdf.rect(0, 60, 210, 4, style="F") |
| self.pdf.ln(20) |
| self.pdf.set_font("Helvetica", "B", 28) |
| self.pdf.set_text_color(r, g, b) |
| self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.set_font("Helvetica", "", 16) |
| self.pdf.set_text_color(80, 80, 80) |
| self.pdf.cell(0, 10, "HelpScout Analysis Report", |
| align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.ln(10) |
| self.pdf.set_font("Helvetica", "", 12) |
| self.pdf.set_text_color(100, 100, 100) |
| self.pdf.cell(0, 8, f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}", |
| align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.ln(5) |
| self.pdf.set_font("Helvetica", "", 10) |
| self.pdf.cell(0, 7, f"Matched Conversations: {len(df):,}", |
| align="C", new_x="LMARGIN", new_y="NEXT") |
| if filter_info: |
| self.pdf.ln(8) |
| self.pdf.set_font("Helvetica", "B", 9) |
| self.pdf.set_text_color(80, 80, 80) |
| self.pdf.cell(0, 6, "Applied Filters:", align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.set_font("Helvetica", "", 9) |
| for k, v in filter_info.items(): |
| if v: |
| self.pdf.cell(0, 5, MusoraPDF._sanitize(f"{k}: {v}"), |
| align="C", new_x="LMARGIN", new_y="NEXT") |
| self.pdf.ln(20) |
| self.pdf.set_font("Helvetica", "I", 8) |
| self.pdf.set_text_color(150, 150, 150) |
| self.pdf.cell(0, 6, "Confidential - For Internal Use Only", |
| align="C", new_x="LMARGIN", new_y="NEXT") |
|
|
| def _filter_summary_section(self, filter_info, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Filter Set Summary") |
| if filter_info: |
| rows = [(k, MusoraPDF._sanitize(str(v))) for k, v in filter_info.items() if v] |
| if rows: |
| self.pdf.add_table(["Filter", "Value"], rows, col_widths=[80, 110]) |
| else: |
| self.pdf.body_text("No filters applied — report covers all available conversations.") |
|
|
| def _kpi_section(self, df): |
| total = len(df) |
| flags = boolean_flag_counts(df) |
| neg_pct = df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() / total * 100 if total else 0 |
| pos_pct = df["sentiment_polarity"].isin(["positive", "very_positive"]).sum() / total * 100 if total else 0 |
| avg_dur = float(df["duration_hours"].mean()) if "duration_hours" in df.columns else 0 |
| esc = int(df["is_escalation"].sum()) if "is_escalation" in df.columns else 0 |
|
|
| self.pdf.section_header("Key Metrics") |
| self.pdf.metric_row([ |
| ("Conversations", f"{total:,}"), |
| ("Positive %", f"{pos_pct:.1f}%"), |
| ("Negative %", f"{neg_pct:.1f}%"), |
| ("Avg Duration (h)", f"{avg_dur:.1f}"), |
| ]) |
| self.pdf.metric_row([ |
| ("Escalations", f"{esc:,}"), |
| ("Refund Requests", f"{flags['is_refund_request']:,}"), |
| ("Cancellations", f"{flags['is_cancellation']:,}"), |
| ("Membership Joins", f"{flags['is_membership']:,}"), |
| ]) |
| if "is_member" in df.columns: |
| member_count = int(df["is_member"].sum()) |
| non_member_count = total - member_count |
| self.pdf.metric_row([ |
| ("Members", f"{member_count:,}"), |
| ("Non-Members", f"{non_member_count:,}"), |
| ("Email Match Rate", f"{member_count / total * 100:.1f}%" if total else "N/A"), |
| ]) |
|
|
| def _distributions_section(self, df): |
| self.pdf.add_page() |
| self.pdf.section_header("Distributions") |
| pie = self.charts.create_sentiment_pie_chart(df, title="Sentiment Distribution") |
| tbar = self.charts.create_topic_bar_chart(df, title="Topic Distribution") |
| self._add_two_charts(pie, tbar) |
| self._add_chart(self.charts.create_topic_sentiment_heatmap(df), img_h=500) |
| if "is_member" in df.columns: |
| self.pdf.add_page() |
| self.pdf.section_header("Member vs Non-Member Breakdown") |
| self._add_two_charts( |
| self.charts.create_member_status_chart(df, title="Member vs Non-Member"), |
| self.charts.create_member_sentiment_chart(df, title="Sentiment by Member Status"), |
| ) |
| self._add_chart( |
| self.charts.create_member_topic_chart(df, title="Top Topics by Member Status"), |
| img_h=500, |
| ) |
|
|
| def _summary_section(self, result: dict): |
| self.pdf.add_page() |
| self.pdf.section_header("AI Summary Report") |
|
|
| if result is None or not result.get("success"): |
| self.pdf.callout_box( |
| "AI summary not generated. To include it, click 'Generate Summary Report' " |
| "in the app before exporting the PDF.", |
| bg_color=(255, 250, 230), |
| ) |
| return |
|
|
| summary = result.get("summary", {}) |
| meta = result.get("metadata", {}) |
|
|
| exec_summary = MusoraPDF._sanitize(summary.get("executive_summary", "")) |
| if exec_summary: |
| self.pdf.subsection_header("Executive Summary") |
| self.pdf.section_description(exec_summary) |
|
|
| themes = summary.get("top_themes", []) |
| if themes: |
| self.pdf.subsection_header("Top Themes") |
| for t in themes: |
| theme_text = MusoraPDF._sanitize( |
| f"{t.get('theme', '')} — {t.get('prevalence', '')}: {t.get('description', '')}" |
| ) |
| self.pdf.body_text(f" * {theme_text}") |
|
|
| complaints = summary.get("top_complaints", []) |
| if complaints: |
| self.pdf.subsection_header("Top Complaints") |
| for c in complaints: |
| self.pdf.body_text(f" * {MusoraPDF._sanitize(c)}") |
|
|
| insights = summary.get("unexpected_insights", []) |
| if insights: |
| self.pdf.subsection_header("Unexpected Insights") |
| for ins in insights: |
| self.pdf.body_text(f" * {MusoraPDF._sanitize(ins)}") |
|
|
| quotes = summary.get("notable_quotes", []) |
| if quotes: |
| self.pdf.subsection_header("Notable Quotes") |
| for q in quotes: |
| self.pdf.body_text(f' "{MusoraPDF._sanitize(q)}"') |
|
|
| self.pdf.ln(4) |
| self.pdf.callout_box( |
| f"Analysis based on {meta.get('total_conversations_analyzed', 0)} conversations " |
| f"| Model: {meta.get('model_used', 'N/A')} " |
| f"| Tokens: {meta.get('tokens_used', 0):,}", |
| bg_color=(240, 248, 255), |
| ) |
|
|
| def _data_summary(self, df, filter_info): |
| self.pdf.add_page() |
| self.pdf.section_header("Data Summary") |
| self.pdf.body_text(f"Report generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
| self.pdf.body_text(f"Total conversations in report: {len(df):,}") |
| self.pdf.callout_box( |
| "Data source: SOCIAL_MEDIA_DB.ML_FEATURES.HELPSCOUT_CONVERSATION_FEATURES\n" |
| "This report is confidential and intended for internal Musora team use only.", |
| bg_color=(245, 245, 245), |
| ) |