""" HelpScout-specific Plotly chart functions. All functions accept a HelpScout conversations DataFrame and return a plotly.graph_objects.Figure. """ import json import sys from pathlib import Path import pandas as pd import plotly.graph_objects as go # Ensure project root is on sys.path so visualization.* imports resolve _root = Path(__file__).resolve().parent.parent.parent if str(_root) not in sys.path: sys.path.insert(0, str(_root)) from visualization.utils.helpscout_utils import ( explode_topics, parse_topics, topic_label, load_topic_taxonomy ) class HelpScoutCharts: """Plotly chart factory for HelpScout conversation data.""" def __init__(self, config_path=None): if config_path is None: config_path = Path(__file__).parent.parent / "config" / "viz_config.json" with open(config_path, "r") as f: config = json.load(f) hs_colors = config.get("color_schemes_helpscout", {}) self.topic_colors = hs_colors.get("topics", {}) self.status_colors = hs_colors.get("status", {}) self.flag_colors = hs_colors.get("boolean_flags", {}) self.sentiment_colors = config.get("color_schemes", {}).get("sentiment_polarity", {}) self.sentiment_order = config.get("sentiment_order", []) self.chart_height = config.get("dashboard", {}).get("chart_height", 400) self.taxonomy = load_topic_taxonomy() # ───────────────────────────────────────────────────────────── # Sentiment charts # ───────────────────────────────────────────────────────────── def create_sentiment_pie_chart(self, df, title="Sentiment Distribution"): counts = df["sentiment_polarity"].value_counts() ordered = [s for s in self.sentiment_order if s in counts.index] counts = counts[ordered] colors = [self.sentiment_colors.get(s, "#CCCCCC") for s in counts.index] fig = go.Figure(go.Pie( labels=counts.index, values=counts.values, marker=dict(colors=colors), textinfo="label+percent", hovertemplate="%{label}
Count: %{value}
%{percent}", )) fig.update_layout(title=title, height=self.chart_height, legend=dict(orientation="v", yanchor="middle", y=0.5)) return fig def create_sentiment_score_gauge(self, avg_score, title="Sentiment Score"): normalized = ((avg_score + 2) / 4) * 100 fig = go.Figure(go.Indicator( mode="gauge+number", value=normalized, title={"text": title, "font": {"size": 18}}, number={"font": {"size": 36}}, gauge={ "axis": {"range": [0, 100]}, "bar": {"color": "darkblue"}, "steps": [ {"range": [0, 20], "color": "#D32F2F"}, {"range": [20, 40], "color": "#FF6F00"}, {"range": [40, 60], "color": "#FFB300"}, {"range": [60, 80], "color": "#7CB342"}, {"range": [80, 100],"color": "#00C851"}, ], }, )) fig.update_layout(height=300, margin=dict(l=20, r=20, t=60, b=20)) return fig def create_sentiment_timeline(self, df, title="Sentiment Over Time", freq="W"): if "first_message_at" not in df.columns: return self._empty_fig(title, "No timestamp data") df_t = df.copy() df_t["date"] = pd.to_datetime(df_t["first_message_at"]).dt.to_period(freq).dt.to_timestamp() agg = df_t.groupby(["date", "sentiment_polarity"]).size().reset_index(name="count") fig = go.Figure() for s in self.sentiment_order: d = agg[agg["sentiment_polarity"] == s] if not d.empty: fig.add_trace(go.Scatter( x=d["date"], y=d["count"], name=s, mode="lines+markers", line=dict(color=self.sentiment_colors.get(s, "#CCCCCC"), width=2), hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, xaxis_title="Date", yaxis_title="Conversations", height=self.chart_height, hovermode="x unified") return fig # ───────────────────────────────────────────────────────────── # Topic charts # ───────────────────────────────────────────────────────────── def create_topic_bar_chart(self, df, title="Topic Distribution", orientation="h", top_n=None): exploded = explode_topics(df) if exploded.empty: return self._empty_fig(title, "No topic data") counts = exploded["topic_id"].value_counts() if top_n: counts = counts.head(top_n) labels = [topic_label(t, self.taxonomy) for t in counts.index] colors = [self.topic_colors.get(t, "#607D8B") for t in counts.index] if orientation == "h": fig = go.Figure(go.Bar( y=labels, x=counts.values, orientation="h", marker=dict(color=colors), text=counts.values, textposition="auto", hovertemplate="%{y}
%{x} conversations", )) fig.update_layout(title=title, xaxis_title="Conversations", yaxis_title="Topic", height=self.chart_height, yaxis={"categoryorder": "total ascending"}) else: fig = go.Figure(go.Bar( x=labels, y=counts.values, marker=dict(color=colors), text=counts.values, textposition="auto", hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, xaxis_title="Topic", yaxis_title="Conversations", height=self.chart_height) return fig def create_topic_pie_chart(self, df, title="Topic Distribution"): exploded = explode_topics(df) if exploded.empty: return self._empty_fig(title, "No topic data") counts = exploded["topic_id"].value_counts() labels = [topic_label(t, self.taxonomy) for t in counts.index] colors = [self.topic_colors.get(t, "#607D8B") for t in counts.index] fig = go.Figure(go.Pie( labels=labels, values=counts.values, marker=dict(colors=colors), textinfo="label+percent", hovertemplate="%{label}
%{value}
%{percent}", )) fig.update_layout(title=title, height=self.chart_height) return fig def create_topic_sentiment_heatmap(self, df, title="Topic × Sentiment Heatmap"): exploded = explode_topics(df) if exploded.empty or "sentiment_polarity" not in exploded.columns: return self._empty_fig(title, "No data") pivot = pd.crosstab(exploded["topic_id"], exploded["sentiment_polarity"]) pivot.index = [topic_label(t, self.taxonomy) for t in pivot.index] ordered_cols = [s for s in self.sentiment_order if s in pivot.columns] pivot = pivot[ordered_cols] if ordered_cols else pivot fig = go.Figure(go.Heatmap( z=pivot.values, x=pivot.columns.tolist(), y=pivot.index.tolist(), colorscale="Blues", text=pivot.values, texttemplate="%{text}", hovertemplate="%{y} — %{x}
%{z}", colorbar=dict(title="Conversations"), )) fig.update_layout(title=title, xaxis_title="Sentiment", yaxis_title="Topic", height=self.chart_height + 100) return fig def get_all_topics_ranked(self, df): """Return all topic_ids sorted by total volume (descending).""" exploded = explode_topics(df) if exploded.empty: return [] return exploded["topic_id"].value_counts().index.tolist() def create_topic_timeline(self, df, title="Topic Volume Over Time", freq="W", top_n=5, selected_topics=None): if "first_message_at" not in df.columns: return self._empty_fig(title, "No timestamp data") exploded = explode_topics(df) if exploded.empty: return self._empty_fig(title, "No topic data") all_ranked = exploded["topic_id"].value_counts().index.tolist() if selected_topics is not None: topics = [t for t in all_ranked if t in selected_topics] else: topics = all_ranked[:top_n] if not topics: return self._empty_fig(title, "No topics selected") exploded = exploded[exploded["topic_id"].isin(topics)].copy() exploded["date"] = pd.to_datetime(exploded["first_message_at"]).dt.to_period(freq).dt.to_timestamp() agg = exploded.groupby(["date", "topic_id"]).size().reset_index(name="count") fig = go.Figure() for t in topics: d = agg[agg["topic_id"] == t] if not d.empty: fig.add_trace(go.Scatter( x=d["date"], y=d["count"], name=topic_label(t, self.taxonomy), mode="lines+markers", line=dict(color=self.topic_colors.get(t, "#607D8B"), width=2), hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, xaxis_title="Date", yaxis_title="Conversations", height=self.chart_height, hovermode="x unified") return fig # ───────────────────────────────────────────────────────────── # Volume & timelines # ───────────────────────────────────────────────────────────── def create_volume_timeline(self, df, title="Conversation Volume Over Time", freq="W"): if "first_message_at" not in df.columns: return self._empty_fig(title, "No timestamp data") df_t = df.copy() df_t["date"] = pd.to_datetime(df_t["first_message_at"]).dt.to_period(freq).dt.to_timestamp() agg = df_t.groupby("date").size().reset_index(name="count") fig = go.Figure(go.Bar( x=agg["date"], y=agg["count"], marker_color="#1982C4", hovertemplate="%{x}
%{y} conversations", )) fig.update_layout(title=title, xaxis_title="Date", yaxis_title="Conversations", height=self.chart_height) return fig def create_refund_cancel_timeline(self, df, title="Refund & Cancellation Over Time", freq="W"): if "first_message_at" not in df.columns: return self._empty_fig(title, "No timestamp data") df_t = df.copy() df_t["date"] = pd.to_datetime(df_t["first_message_at"]).dt.to_period(freq).dt.to_timestamp() fig = go.Figure() for col, label, color in [ ("is_refund_request", "Refund Requests", "#D32F2F"), ("is_cancellation", "Cancellations", "#FF6F00"), ("is_membership", "Membership Joins", "#00C851"), ]: if col in df_t.columns: agg = df_t[df_t[col] == True].groupby("date").size().reset_index(name="count") if not agg.empty: fig.add_trace(go.Scatter( x=agg["date"], y=agg["count"], name=label, mode="lines+markers", line=dict(color=color, width=2), hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, xaxis_title="Date", yaxis_title="Conversations", height=self.chart_height, hovermode="x unified") return fig # ───────────────────────────────────────────────────────────── # Status / source / flags # ───────────────────────────────────────────────────────────── def create_status_distribution(self, df, title="Conversations by Status"): if "status" not in df.columns: return self._empty_fig(title, "No status data") counts = df["status"].value_counts() colors = [self.status_colors.get(s, self.status_colors.get("default", "#607D8B")) for s in counts.index] fig = go.Figure(go.Bar( x=counts.index, y=counts.values, marker=dict(color=colors), text=counts.values, textposition="auto", hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, xaxis_title="Status", yaxis_title="Conversations", height=self.chart_height) return fig def create_source_distribution(self, df, title="Conversations by Source Type"): if "source_type" not in df.columns: return self._empty_fig(title, "No source data") counts = df["source_type"].value_counts() fig = go.Figure(go.Bar( x=counts.index, y=counts.values, marker_color="#1982C4", text=counts.values, textposition="auto", hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, xaxis_title="Source", yaxis_title="Conversations", height=self.chart_height) return fig def create_boolean_flags_chart(self, df, title="Key Billing & Membership Flags"): labels, values, colors = [], [], [] for col, label in [("is_refund_request", "Refund Requests"), ("is_cancellation", "Cancellations"), ("is_membership", "Membership Joins")]: if col in df.columns: labels.append(label) values.append(int(df[col].sum())) colors.append(self.flag_colors.get(col, "#607D8B")) if not values: return self._empty_fig(title, "No flag data") fig = go.Figure(go.Bar( x=labels, y=values, marker=dict(color=colors), text=values, textposition="auto", hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, xaxis_title="Flag", yaxis_title="Conversations", height=self.chart_height) return fig def create_escalation_breakdown(self, df, title="Escalation Queue by Topic"): if "is_escalation" not in df.columns: return self._empty_fig(title, "No escalation data") exploded = explode_topics(df) if exploded.empty: return self._empty_fig(title, "No topic data") pivot = pd.crosstab(exploded["topic_id"], exploded["is_escalation"]) pivot.index = [topic_label(t, self.taxonomy) for t in pivot.index] fig = go.Figure() for flag, label, color in [(False, "Normal", "#4CAF50"), (True, "Escalation", "#D32F2F")]: if flag in pivot.columns: fig.add_trace(go.Bar( name=label, y=pivot.index, x=pivot[flag], orientation="h", marker_color=color, hovertemplate="%{y}
%{x}", )) fig.update_layout(title=title, barmode="stack", xaxis_title="Conversations", yaxis_title="Topic", height=self.chart_height, yaxis={"categoryorder": "total ascending"}) return fig # ───────────────────────────────────────────────────────────── # Duration & thread count # ───────────────────────────────────────────────────────────── def create_duration_histogram(self, df, title="Conversation Duration Distribution"): if "duration_hours" not in df.columns: return self._empty_fig(title, "No duration data") d = df["duration_hours"].dropna() fig = go.Figure(go.Histogram( x=d, nbinsx=40, marker_color="#1982C4", hovertemplate="Duration: %{x:.1f}h
Count: %{y}", )) fig.update_layout(title=title, xaxis_title="Duration (hours)", yaxis_title="Conversations", height=self.chart_height) return fig def create_thread_count_histogram(self, df, title="Thread Count Distribution"): if "thread_count" not in df.columns: return self._empty_fig(title, "No thread data") t = df["thread_count"].dropna() fig = go.Figure(go.Histogram( x=t, nbinsx=30, marker_color="#9C27B0", hovertemplate="Threads: %{x}
Count: %{y}", )) fig.update_layout(title=title, xaxis_title="Number of Threads", yaxis_title="Conversations", height=self.chart_height) return fig # ───────────────────────────────────────────────────────────── # Emotion (same logic as DistributionCharts but with helpscout df) # ───────────────────────────────────────────────────────────── def create_emotion_bar_chart(self, df, title="Emotion Distribution", orientation="h"): if "emotions" not in df.columns or df["emotions"].isna().all(): return self._empty_fig(title, "No emotion data") emotion_colors = { "joy": "#FFD700", "excitement": "#FF6B35", "gratitude": "#4CAF50", "admiration": "#2196F3", "curiosity": "#00BCD4", "humor": "#9C27B0", "frustration": "#FF9800", "disappointment": "#795548", "sadness": "#607D8B", "anger": "#D32F2F", "neutral": "#9E9E9E", } df_e = df.copy() df_e["emotions"] = df_e["emotions"].str.split(",") df_e = df_e.explode("emotions") df_e["emotions"] = df_e["emotions"].str.strip().str.lower() counts = df_e["emotions"].dropna().value_counts() colors = [emotion_colors.get(e, "#CCCCCC") for e in counts.index] if orientation == "h": fig = go.Figure(go.Bar( y=counts.index, x=counts.values, orientation="h", marker=dict(color=colors), text=counts.values, textposition="auto", hovertemplate="%{y}
%{x}", )) fig.update_layout(title=title, xaxis_title="Conversations", yaxis_title="Emotion", height=self.chart_height, yaxis={"categoryorder": "total ascending"}) else: fig = go.Figure(go.Bar( x=counts.index, y=counts.values, marker=dict(color=colors), text=counts.values, textposition="auto", hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, xaxis_title="Emotion", yaxis_title="Conversations", height=self.chart_height) return fig # ───────────────────────────────────────────────────────────── # Member vs Non-Member charts # ───────────────────────────────────────────────────────────── def create_member_status_chart(self, df, title="Member vs Non-Member"): """Pie chart: proportion of conversations from Musora members vs non-members.""" if "is_member" not in df.columns: return self._empty_fig(title, "No member data available") label_map = {True: "Member", False: "Non-Member"} counts = df["is_member"].map(label_map).value_counts() color_map = {"Member": "#1982C4", "Non-Member": "#FF6B35"} colors = [color_map.get(l, "#CCCCCC") for l in counts.index] fig = go.Figure(go.Pie( labels=counts.index, values=counts.values, marker=dict(colors=colors), textinfo="label+percent", hovertemplate="%{label}
Count: %{value}
%{percent}", )) fig.update_layout(title=title, height=self.chart_height, legend=dict(orientation="v", yanchor="middle", y=0.5)) return fig def create_member_sentiment_chart(self, df, title="Sentiment by Member Status"): """Stacked bar: sentiment distribution split by member vs non-member.""" if "is_member" not in df.columns or "sentiment_polarity" not in df.columns: return self._empty_fig(title, "No member/sentiment data available") df_c = df.copy() df_c["member_status"] = df_c["is_member"].map({True: "Member", False: "Non-Member"}) pivot = pd.crosstab(df_c["member_status"], df_c["sentiment_polarity"]) ordered_cols = [s for s in self.sentiment_order if s in pivot.columns] pivot = pivot[ordered_cols] if ordered_cols else pivot fig = go.Figure() for s in (ordered_cols or pivot.columns.tolist()): fig.add_trace(go.Bar( name=s, x=pivot.index, y=pivot[s], marker_color=self.sentiment_colors.get(s, "#CCCCCC"), hovertemplate="%{x}
%{y}", )) fig.update_layout(title=title, barmode="stack", xaxis_title="Customer Type", yaxis_title="Conversations", height=self.chart_height) return fig def create_member_topic_chart(self, df, title="Top Topics by Member Status"): """Grouped bar: top-10 topics split by member vs non-member.""" if "is_member" not in df.columns: return self._empty_fig(title, "No member data available") exploded = explode_topics(df) if exploded.empty: return self._empty_fig(title, "No topic data") exploded["member_status"] = exploded["is_member"].map({True: "Member", False: "Non-Member"}) top_topics = exploded["topic_id"].value_counts().head(10).index.tolist() exploded = exploded[exploded["topic_id"].isin(top_topics)] pivot = pd.crosstab(exploded["topic_id"], exploded["member_status"]) pivot.index = [topic_label(t, self.taxonomy) for t in pivot.index] fig = go.Figure() color_map = {"Member": "#1982C4", "Non-Member": "#FF6B35"} for col in pivot.columns: fig.add_trace(go.Bar( name=col, y=pivot.index, x=pivot[col], orientation="h", marker_color=color_map.get(col, "#CCCCCC"), hovertemplate="%{y}
%{x}", )) fig.update_layout(title=title, barmode="group", xaxis_title="Conversations", yaxis_title="Topic", height=self.chart_height + 80, yaxis={"categoryorder": "total ascending"}) return fig # ───────────────────────────────────────────────────────────── # Helpers # ───────────────────────────────────────────────────────────── @staticmethod def _empty_fig(title, message): fig = go.Figure() fig.add_annotation(text=message, xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14)) fig.update_layout(title=title, height=300) return fig