""" HelpScout PDF Exporters. Two classes sharing the MusoraPDF base from pdf_exporter.py: - HelpScoutDashboardPDF : full HelpScout dashboard report - HelpScoutAnalysisPDF : filtered analysis report + optional LLM summary """ import logging import os import sys import tempfile from datetime import datetime from pathlib import Path import plotly.io as pio _parent = Path(__file__).resolve().parent.parent if str(_parent) not in sys.path: sys.path.insert(0, str(_parent)) from utils.pdf_exporter import MusoraPDF # reuse base class from utils.helpscout_utils import boolean_flag_counts, topic_label, load_topic_taxonomy from visualizations.helpscout_charts import HelpScoutCharts logger = logging.getLogger(__name__) _RENDER_SCALE = 3 # --------------------------------------------------------------------------- # Shared rendering helpers (mixin-style functions) # --------------------------------------------------------------------------- def _prepare_fig(fig, is_side_by_side=False): base_fs = 13 if is_side_by_side else 14 fig.update_layout( paper_bgcolor="white", plot_bgcolor="white", font=dict(color="black", size=base_fs), title_font_size=base_fs + 4, margin=(dict(l=60, r=40, t=60, b=60) if is_side_by_side else dict(l=80, r=40, t=60, b=80)), ) fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) def _fig_to_tmp(fig, width=800, height=400, is_side_by_side=False) -> str: _prepare_fig(fig, is_side_by_side) img = pio.to_image(fig, format="png", width=width, height=height, scale=_RENDER_SCALE, engine="kaleido") tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) tmp.write(img) tmp.close() return tmp.name def _cleanup(paths): for p in paths: try: os.unlink(p) except OSError: pass # --------------------------------------------------------------------------- # HelpScoutDashboardPDF # --------------------------------------------------------------------------- class HelpScoutDashboardPDF: """ Generates a comprehensive HelpScout dashboard PDF report. """ def __init__(self): self.charts = HelpScoutCharts() self.taxonomy = load_topic_taxonomy() self._tmp: list = [] def generate_report(self, df, filter_info: dict = None) -> bytes: """Build and return the full dashboard PDF.""" self.pdf = MusoraPDF() self._tmp = [] try: self._cover(df, filter_info) self._executive_summary(df) self._sentiment_section(df) self._topic_section(df) self._emotion_section(df) self._flags_section(df) self._status_source_section(df) self._timelines_section(df) self._depth_section(df) self._member_section(df) self._data_summary(df, filter_info) return bytes(self.pdf.output()) finally: _cleanup(self._tmp) # ── Rendering helpers ── def _add_chart(self, fig, width=180, img_w=800, img_h=400): try: p = _fig_to_tmp(fig, img_w, img_h) self._tmp.append(p) h_mm = width * (img_h / img_w) self.pdf.check_page_break(h_mm + 5) self.pdf.image(p, x=10, w=width) self.pdf.ln(3) except Exception: logger.exception("Chart render failed") self.pdf.body_text("[Chart could not be rendered]") def _add_two_charts(self, fig1, fig2, width=92): try: p1 = _fig_to_tmp(fig1, 700, 450, is_side_by_side=True); self._tmp.append(p1) p2 = _fig_to_tmp(fig2, 700, 450, is_side_by_side=True); self._tmp.append(p2) h_mm = width * (450 / 700) self.pdf.check_page_break(h_mm + 5) y = self.pdf.get_y() self.pdf.image(p1, x=10, y=y, w=width) self.pdf.image(p2, x=10 + width + 4, y=y, w=width) self.pdf.set_y(y + h_mm + 3) except Exception: logger.exception("Side-by-side render failed") self.pdf.body_text("[Charts could not be rendered]") # ── Sections ── def _cover(self, df, filter_info): self.pdf.add_page() self.pdf.ln(40) r, g, b = MusoraPDF.PRIMARY self.pdf.set_fill_color(r, g, b) self.pdf.rect(0, 60, 210, 4, style="F") self.pdf.ln(20) self.pdf.set_font("Helvetica", "B", 28) self.pdf.set_text_color(r, g, b) self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.set_font("Helvetica", "", 16) self.pdf.set_text_color(80, 80, 80) self.pdf.cell(0, 10, "HelpScout Support Dashboard Report", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.ln(10) self.pdf.set_font("Helvetica", "", 12) self.pdf.set_text_color(100, 100, 100) self.pdf.cell(0, 8, f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.ln(5) self.pdf.set_font("Helvetica", "", 10) self.pdf.cell(0, 7, f"Total Conversations: {len(df):,}", align="C", new_x="LMARGIN", new_y="NEXT") if "first_message_at" in df.columns and not df.empty: valid = df["first_message_at"].dropna() if not valid.empty: dr = f"{valid.min().strftime('%b %d, %Y')} to {valid.max().strftime('%b %d, %Y')}" self.pdf.ln(3) self.pdf.set_font("Helvetica", "I", 9) self.pdf.set_text_color(120, 120, 120) self.pdf.cell(0, 6, MusoraPDF._sanitize(f"Data period: {dr}"), align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.ln(20) self.pdf.set_font("Helvetica", "I", 8) self.pdf.set_text_color(150, 150, 150) self.pdf.cell(0, 6, "Confidential - For Internal Use Only", align="C", new_x="LMARGIN", new_y="NEXT") def _executive_summary(self, df): self.pdf.add_page() self.pdf.section_header("Executive Summary") total = len(df) flags = boolean_flag_counts(df) neg = df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() pos = df["sentiment_polarity"].isin(["positive", "very_positive"]).sum() neg_pct = neg / total * 100 if total else 0 pos_pct = pos / total * 100 if total else 0 esc = int(df["is_escalation"].sum()) if "is_escalation" in df.columns else 0 avg_dur = float(df["duration_hours"].mean()) if "duration_hours" in df.columns else 0 self.pdf.metric_row([ ("Total Conversations", f"{total:,}"), ("Positive %", f"{pos_pct:.1f}%"), ("Negative %", f"{neg_pct:.1f}%"), ("Avg Duration (h)", f"{avg_dur:.1f}"), ]) self.pdf.metric_row([ ("Escalations", f"{esc:,}"), ("Refund Requests", f"{flags['is_refund_request']:,}"), ("Cancellations", f"{flags['is_cancellation']:,}"), ("Membership Joins", f"{flags['is_membership']:,}"), ]) def _sentiment_section(self, df): self.pdf.add_page() self.pdf.section_header("Sentiment Distribution") pie = self.charts.create_sentiment_pie_chart(df, title="Sentiment Distribution") gauge = self.charts.create_sentiment_score_gauge(self._avg_score(df)) self._add_two_charts(pie, gauge) def _topic_section(self, df): self.pdf.add_page() self.pdf.section_header("Topic Analysis") bar = self.charts.create_topic_bar_chart(df, title="Conversations by Topic") pie = self.charts.create_topic_pie_chart(df, title="Topic Share") self._add_two_charts(bar, pie) self._add_chart(self.charts.create_topic_sentiment_heatmap(df), img_h=500) def _emotion_section(self, df): if "emotions" not in df.columns or df["emotions"].dropna().empty: return self.pdf.add_page() self.pdf.section_header("Emotion Analysis") self._add_chart(self.charts.create_emotion_bar_chart(df, title="Emotion Distribution")) def _flags_section(self, df): self.pdf.add_page() self.pdf.section_header("Billing & Membership Flags") flags_chart = self.charts.create_boolean_flags_chart(df) esc_chart = self.charts.create_escalation_breakdown(df) self._add_two_charts(flags_chart, esc_chart) def _status_source_section(self, df): self.pdf.add_page() self.pdf.section_header("Status & Source Distribution") status_chart = self.charts.create_status_distribution(df) source_chart = self.charts.create_source_distribution(df) self._add_two_charts(status_chart, source_chart) def _timelines_section(self, df): self.pdf.add_page() self.pdf.section_header("Volume & Trends (Weekly)") self._add_chart(self.charts.create_volume_timeline(df, freq="W")) self._add_chart(self.charts.create_sentiment_timeline(df, freq="W")) self._add_chart(self.charts.create_refund_cancel_timeline(df, freq="W")) def _depth_section(self, df): self.pdf.add_page() self.pdf.section_header("Conversation Depth") dur = self.charts.create_duration_histogram(df) thd = self.charts.create_thread_count_histogram(df) self._add_two_charts(dur, thd) def _member_section(self, df): if "is_member" not in df.columns: return self.pdf.add_page() self.pdf.section_header("Member vs Non-Member Analysis") total = len(df) member_count = int(df["is_member"].sum()) non_member_count = total - member_count match_pct = member_count / total * 100 if total else 0 self.pdf.metric_row([ ("Members", f"{member_count:,}"), ("Non-Members", f"{non_member_count:,}"), ("Email Match Rate", f"{match_pct:.1f}%"), ]) self.pdf.body_text( "Members are customers whose email was matched against Musora user records. " "Non-Members contacted support without an associated Musora account." ) self._add_two_charts( self.charts.create_member_status_chart(df, title="Member vs Non-Member"), self.charts.create_member_sentiment_chart(df, title="Sentiment by Member Status"), ) self._add_chart( self.charts.create_member_topic_chart(df, title="Top Topics by Member Status"), img_h=500, ) def _data_summary(self, df, filter_info): self.pdf.add_page() self.pdf.section_header("Data Summary") self.pdf.body_text(f"Report generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") self.pdf.body_text(f"Total conversations: {len(df):,}") self.pdf.callout_box( "Data source: SOCIAL_MEDIA_DB.ML_FEATURES.HELPSCOUT_CONVERSATION_FEATURES\n" "This report is confidential and intended for internal Musora team use only.", bg_color=(245, 245, 245), ) @staticmethod def _avg_score(df) -> float: score_map = {"very_positive": 2, "positive": 1, "neutral": 0, "negative": -1, "very_negative": -2} if "sentiment_polarity" not in df.columns or df.empty: return 0.0 return float(df["sentiment_polarity"].map(score_map).fillna(0).mean()) # --------------------------------------------------------------------------- # HelpScoutAnalysisPDF # --------------------------------------------------------------------------- class HelpScoutAnalysisPDF: """ Generates a focused analysis PDF from the HelpScout Analysis page. Includes filter summary, distributions, and optionally the LLM summary report. """ def __init__(self): self.charts = HelpScoutCharts() self.taxonomy = load_topic_taxonomy() self._tmp: list = [] def generate_report(self, df, filter_info: dict = None, summary_result: dict = None) -> bytes: """ Build and return the analysis PDF. Args: df: Filtered HelpScout analysis DataFrame. filter_info: Dict of filter descriptions for the cover. summary_result: Output from HelpScoutSummaryAgent.process() or None. """ self.pdf = MusoraPDF() self._tmp = [] try: self._cover(df, filter_info) self._filter_summary_section(filter_info, df) self._kpi_section(df) self._distributions_section(df) self._summary_section(summary_result) self._data_summary(df, filter_info) return bytes(self.pdf.output()) finally: _cleanup(self._tmp) # ── Rendering helpers ── def _add_chart(self, fig, width=180, img_w=800, img_h=400): try: p = _fig_to_tmp(fig, img_w, img_h) self._tmp.append(p) h_mm = width * (img_h / img_w) self.pdf.check_page_break(h_mm + 5) self.pdf.image(p, x=10, w=width) self.pdf.ln(3) except Exception: logger.exception("Chart render failed") self.pdf.body_text("[Chart could not be rendered]") def _add_two_charts(self, fig1, fig2, width=92): try: p1 = _fig_to_tmp(fig1, 700, 450, is_side_by_side=True); self._tmp.append(p1) p2 = _fig_to_tmp(fig2, 700, 450, is_side_by_side=True); self._tmp.append(p2) h_mm = width * (450 / 700) self.pdf.check_page_break(h_mm + 5) y = self.pdf.get_y() self.pdf.image(p1, x=10, y=y, w=width) self.pdf.image(p2, x=10 + width + 4, y=y, w=width) self.pdf.set_y(y + h_mm + 3) except Exception: logger.exception("Side-by-side render failed") self.pdf.body_text("[Charts could not be rendered]") # ── Sections ── def _cover(self, df, filter_info): self.pdf.add_page() self.pdf.ln(40) r, g, b = MusoraPDF.PRIMARY self.pdf.set_fill_color(r, g, b) self.pdf.rect(0, 60, 210, 4, style="F") self.pdf.ln(20) self.pdf.set_font("Helvetica", "B", 28) self.pdf.set_text_color(r, g, b) self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.set_font("Helvetica", "", 16) self.pdf.set_text_color(80, 80, 80) self.pdf.cell(0, 10, "HelpScout Analysis Report", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.ln(10) self.pdf.set_font("Helvetica", "", 12) self.pdf.set_text_color(100, 100, 100) self.pdf.cell(0, 8, f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.ln(5) self.pdf.set_font("Helvetica", "", 10) self.pdf.cell(0, 7, f"Matched Conversations: {len(df):,}", align="C", new_x="LMARGIN", new_y="NEXT") if filter_info: self.pdf.ln(8) self.pdf.set_font("Helvetica", "B", 9) self.pdf.set_text_color(80, 80, 80) self.pdf.cell(0, 6, "Applied Filters:", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.set_font("Helvetica", "", 9) for k, v in filter_info.items(): if v: self.pdf.cell(0, 5, MusoraPDF._sanitize(f"{k}: {v}"), align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.ln(20) self.pdf.set_font("Helvetica", "I", 8) self.pdf.set_text_color(150, 150, 150) self.pdf.cell(0, 6, "Confidential - For Internal Use Only", align="C", new_x="LMARGIN", new_y="NEXT") def _filter_summary_section(self, filter_info, df): self.pdf.add_page() self.pdf.section_header("Filter Set Summary") if filter_info: rows = [(k, MusoraPDF._sanitize(str(v))) for k, v in filter_info.items() if v] if rows: self.pdf.add_table(["Filter", "Value"], rows, col_widths=[80, 110]) else: self.pdf.body_text("No filters applied — report covers all available conversations.") def _kpi_section(self, df): total = len(df) flags = boolean_flag_counts(df) neg_pct = df["sentiment_polarity"].isin(["negative", "very_negative"]).sum() / total * 100 if total else 0 pos_pct = df["sentiment_polarity"].isin(["positive", "very_positive"]).sum() / total * 100 if total else 0 avg_dur = float(df["duration_hours"].mean()) if "duration_hours" in df.columns else 0 esc = int(df["is_escalation"].sum()) if "is_escalation" in df.columns else 0 self.pdf.section_header("Key Metrics") self.pdf.metric_row([ ("Conversations", f"{total:,}"), ("Positive %", f"{pos_pct:.1f}%"), ("Negative %", f"{neg_pct:.1f}%"), ("Avg Duration (h)", f"{avg_dur:.1f}"), ]) self.pdf.metric_row([ ("Escalations", f"{esc:,}"), ("Refund Requests", f"{flags['is_refund_request']:,}"), ("Cancellations", f"{flags['is_cancellation']:,}"), ("Membership Joins", f"{flags['is_membership']:,}"), ]) if "is_member" in df.columns: member_count = int(df["is_member"].sum()) non_member_count = total - member_count self.pdf.metric_row([ ("Members", f"{member_count:,}"), ("Non-Members", f"{non_member_count:,}"), ("Email Match Rate", f"{member_count / total * 100:.1f}%" if total else "N/A"), ]) def _distributions_section(self, df): self.pdf.add_page() self.pdf.section_header("Distributions") pie = self.charts.create_sentiment_pie_chart(df, title="Sentiment Distribution") tbar = self.charts.create_topic_bar_chart(df, title="Topic Distribution") self._add_two_charts(pie, tbar) self._add_chart(self.charts.create_topic_sentiment_heatmap(df), img_h=500) if "is_member" in df.columns: self.pdf.add_page() self.pdf.section_header("Member vs Non-Member Breakdown") self._add_two_charts( self.charts.create_member_status_chart(df, title="Member vs Non-Member"), self.charts.create_member_sentiment_chart(df, title="Sentiment by Member Status"), ) self._add_chart( self.charts.create_member_topic_chart(df, title="Top Topics by Member Status"), img_h=500, ) def _summary_section(self, result: dict): self.pdf.add_page() self.pdf.section_header("AI Summary Report") if result is None or not result.get("success"): self.pdf.callout_box( "AI summary not generated. To include it, click 'Generate Summary Report' " "in the app before exporting the PDF.", bg_color=(255, 250, 230), ) return summary = result.get("summary", {}) meta = result.get("metadata", {}) exec_summary = MusoraPDF._sanitize(summary.get("executive_summary", "")) if exec_summary: self.pdf.subsection_header("Executive Summary") self.pdf.section_description(exec_summary) themes = summary.get("top_themes", []) if themes: self.pdf.subsection_header("Top Themes") for t in themes: theme_text = MusoraPDF._sanitize( f"{t.get('theme', '')} — {t.get('prevalence', '')}: {t.get('description', '')}" ) self.pdf.body_text(f" * {theme_text}") complaints = summary.get("top_complaints", []) if complaints: self.pdf.subsection_header("Top Complaints") for c in complaints: self.pdf.body_text(f" * {MusoraPDF._sanitize(c)}") insights = summary.get("unexpected_insights", []) if insights: self.pdf.subsection_header("Unexpected Insights") for ins in insights: self.pdf.body_text(f" * {MusoraPDF._sanitize(ins)}") quotes = summary.get("notable_quotes", []) if quotes: self.pdf.subsection_header("Notable Quotes") for q in quotes: self.pdf.body_text(f' "{MusoraPDF._sanitize(q)}"') self.pdf.ln(4) self.pdf.callout_box( f"Analysis based on {meta.get('total_conversations_analyzed', 0)} conversations " f"| Model: {meta.get('model_used', 'N/A')} " f"| Tokens: {meta.get('tokens_used', 0):,}", bg_color=(240, 248, 255), ) def _data_summary(self, df, filter_info): self.pdf.add_page() self.pdf.section_header("Data Summary") self.pdf.body_text(f"Report generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") self.pdf.body_text(f"Total conversations in report: {len(df):,}") self.pdf.callout_box( "Data source: SOCIAL_MEDIA_DB.ML_FEATURES.HELPSCOUT_CONVERSATION_FEATURES\n" "This report is confidential and intended for internal Musora team use only.", bg_color=(245, 245, 245), )