Danialebrat commited on
Commit
8ef893d
Β·
1 Parent(s): 6af8665

Fixing helpscout trend analysis

Browse files
visualization/app.py CHANGED
@@ -8,7 +8,6 @@ import streamlit as st
8
  import sys
9
  from pathlib import Path
10
  import json
11
- from datetime import date
12
 
13
  # Add parent directory to path
14
  parent_dir = Path(__file__).resolve().parent
@@ -131,13 +130,10 @@ def main():
131
  max_date = dashboard_df['comment_timestamp'].max().date()
132
 
133
  prev_range = prev.get('date_range')
134
- # default_range = (
135
- # (prev_range[0], prev_range[1]) if prev_range and len(prev_range) == 2
136
- # else (min_date, max_date)
137
- # )
138
-
139
- default_range= (date(2026, 1, 1), max_date) # year, month, day
140
-
141
  date_range = st.date_input(
142
  "Date Range",
143
  value=default_range,
 
8
  import sys
9
  from pathlib import Path
10
  import json
 
11
 
12
  # Add parent directory to path
13
  parent_dir = Path(__file__).resolve().parent
 
130
  max_date = dashboard_df['comment_timestamp'].max().date()
131
 
132
  prev_range = prev.get('date_range')
133
+ default_range = (
134
+ (prev_range[0], prev_range[1]) if prev_range and len(prev_range) == 2
135
+ else (min_date, max_date)
136
+ )
 
 
 
137
  date_range = st.date_input(
138
  "Date Range",
139
  value=default_range,
visualization/components/helpscout_dashboard.py CHANGED
@@ -164,7 +164,21 @@ def render_helpscout_dashboard(data_loader):
164
  index=1, key="hs_dash_freq")
165
  st.plotly_chart(charts.create_volume_timeline(hs_df, freq=freq), use_container_width=True)
166
  st.plotly_chart(charts.create_sentiment_timeline(hs_df, freq=freq), use_container_width=True)
167
- st.plotly_chart(charts.create_topic_timeline(hs_df, freq=freq), use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  st.plotly_chart(charts.create_refund_cancel_timeline(hs_df, freq=freq), use_container_width=True)
169
 
170
  # ── Duration & Thread Count ───────────────────────────────────────────────
 
164
  index=1, key="hs_dash_freq")
165
  st.plotly_chart(charts.create_volume_timeline(hs_df, freq=freq), use_container_width=True)
166
  st.plotly_chart(charts.create_sentiment_timeline(hs_df, freq=freq), use_container_width=True)
167
+
168
+ all_topics_ranked = charts.get_all_topics_ranked(hs_df)
169
+ topic_options = {t: topic_label(t, charts.taxonomy) for t in all_topics_ranked}
170
+ default_topics = all_topics_ranked[:5]
171
+ selected_topics = st.multiselect(
172
+ "Topics to display",
173
+ options=list(topic_options.keys()),
174
+ default=default_topics,
175
+ format_func=lambda t: topic_options[t],
176
+ key="hs_dash_topic_select",
177
+ )
178
+ st.plotly_chart(
179
+ charts.create_topic_timeline(hs_df, freq=freq, selected_topics=selected_topics or default_topics),
180
+ use_container_width=True,
181
+ )
182
  st.plotly_chart(charts.create_refund_cancel_timeline(hs_df, freq=freq), use_container_width=True)
183
 
184
  # ── Duration & Thread Count ───────────────────────────────────────────────
visualization/visualizations/helpscout_charts.py CHANGED
@@ -176,21 +176,36 @@ class HelpScoutCharts:
176
  yaxis_title="Topic", height=self.chart_height + 100)
177
  return fig
178
 
 
 
 
 
 
 
 
179
  def create_topic_timeline(self, df, title="Topic Volume Over Time",
180
- freq="W", top_n=5):
181
  if "first_message_at" not in df.columns:
182
  return self._empty_fig(title, "No timestamp data")
183
  exploded = explode_topics(df)
184
  if exploded.empty:
185
  return self._empty_fig(title, "No topic data")
186
 
187
- top_topics = exploded["topic_id"].value_counts().head(top_n).index.tolist()
188
- exploded = exploded[exploded["topic_id"].isin(top_topics)].copy()
 
 
 
 
 
 
 
 
189
  exploded["date"] = pd.to_datetime(exploded["first_message_at"]).dt.to_period(freq).dt.to_timestamp()
190
  agg = exploded.groupby(["date", "topic_id"]).size().reset_index(name="count")
191
 
192
  fig = go.Figure()
193
- for t in top_topics:
194
  d = agg[agg["topic_id"] == t]
195
  if not d.empty:
196
  fig.add_trace(go.Scatter(
 
176
  yaxis_title="Topic", height=self.chart_height + 100)
177
  return fig
178
 
179
+ def get_all_topics_ranked(self, df):
180
+ """Return all topic_ids sorted by total volume (descending)."""
181
+ exploded = explode_topics(df)
182
+ if exploded.empty:
183
+ return []
184
+ return exploded["topic_id"].value_counts().index.tolist()
185
+
186
  def create_topic_timeline(self, df, title="Topic Volume Over Time",
187
+ freq="W", top_n=5, selected_topics=None):
188
  if "first_message_at" not in df.columns:
189
  return self._empty_fig(title, "No timestamp data")
190
  exploded = explode_topics(df)
191
  if exploded.empty:
192
  return self._empty_fig(title, "No topic data")
193
 
194
+ all_ranked = exploded["topic_id"].value_counts().index.tolist()
195
+ if selected_topics is not None:
196
+ topics = [t for t in all_ranked if t in selected_topics]
197
+ else:
198
+ topics = all_ranked[:top_n]
199
+
200
+ if not topics:
201
+ return self._empty_fig(title, "No topics selected")
202
+
203
+ exploded = exploded[exploded["topic_id"].isin(topics)].copy()
204
  exploded["date"] = pd.to_datetime(exploded["first_message_at"]).dt.to_period(freq).dt.to_timestamp()
205
  agg = exploded.groupby(["date", "topic_id"]).size().reset_index(name="count")
206
 
207
  fig = go.Figure()
208
+ for t in topics:
209
  d = agg[agg["topic_id"] == t]
210
  if not d.empty:
211
  fig.add_trace(go.Scatter(