Sentiment_analysis / visualization /visualizations /learning_paths_charts.py
Danialebrat's picture
Adding Learning path page and improving HelpScout dashboard
599973c
"""
Learning Paths β€” Plotly chart factory.
All methods accept a merged lesson-metrics DataFrame (output of
learning_paths_utils.merge_lesson_metrics / merge_method_wide) and return
a plotly Figure.
"""
import json
import sys
from pathlib import Path
from typing import Optional
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
_root = Path(__file__).resolve().parent.parent.parent
if str(_root) not in sys.path:
sys.path.insert(0, str(_root))
from visualization.utils.learning_paths_utils import short_title, label_for_path
class LearningPathsCharts:
"""Plotly chart factory for learning-path lesson-level data."""
SENTIMENT_SCORE_COLORS = {
"avg_sentiment_score": "#1982C4",
}
def __init__(self, config_path=None):
if config_path is None:
config_path = Path(__file__).parent.parent / "config" / "viz_config.json"
with open(config_path) as f:
cfg = json.load(f)
self.sentiment_colors = cfg.get("color_schemes", {}).get("sentiment_polarity", {})
self.sentiment_order = cfg.get("sentiment_order", [])
self.lp_config = cfg.get("learning_paths", {})
self.brand_colors = self.lp_config.get("brand_colors", {})
self.chart_height = cfg.get("dashboard", {}).get("chart_height", 400)
# ─────────────────────────────────────────────────────────────────────────
# 1. Sentiment Journey (avg_sentiment_score per lesson)
# ─────────────────────────────────────────────────────────────────────────
def create_sentiment_journey(self, df: pd.DataFrame,
x_col: str = "lesson_order",
group_col: str = "learning_path_id",
title: str = "Sentiment Journey Along the Path") -> go.Figure:
"""Line chart: avg_sentiment_score per lesson, one line per learning path."""
if df.empty or "avg_sentiment_score" not in df.columns or df["avg_sentiment_score"].isna().all():
return self._empty("No comment data available for sentiment scoring")
fig = go.Figure()
path_ids = sorted(df[group_col].unique()) if group_col in df.columns else [None]
for pid in path_ids:
sub = df[df[group_col] == pid].sort_values(x_col) if pid is not None else df
if sub.empty:
continue
label = label_for_path(pid, self.lp_config) if pid is not None else "All Lessons"
color = self._path_color(pid, path_ids)
x_vals = sub[x_col]
y_vals = sub["avg_sentiment_score"]
hover_texts = [
f"<b>Lesson {row[x_col]}</b><br>{short_title(row.get('content_title'))}<br>"
f"Score: {row['avg_sentiment_score']:.2f}<br>Comments: {int(row.get('total_comments', 0)):,}"
for _, row in sub.iterrows()
]
fig.add_trace(go.Scatter(
x=x_vals, y=y_vals, name=label,
mode="lines+markers",
line=dict(color=color, width=2),
marker=dict(size=6),
hovertext=hover_texts, hoverinfo="text",
))
fig.add_hline(y=0, line_dash="dot", line_color="gray", opacity=0.5)
fig.update_layout(
title=title, xaxis_title="Lesson Number", yaxis_title="Avg Sentiment Score",
yaxis=dict(range=[-2.2, 2.2], tickvals=[-2, -1, 0, 1, 2],
ticktext=["Very Negative", "Negative", "Neutral",
"Positive", "Very Positive"]),
height=self.chart_height, hovermode="x unified",
legend=dict(orientation="h", yanchor="bottom", y=1.02),
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 2. Sentiment Stacked Bar (counts per sentiment per lesson)
# ─────────────────────────────────────────────────────────────────────────
def create_sentiment_stacked_bar(self, df: pd.DataFrame,
x_col: str = "lesson_order",
path_id: Optional[int] = None,
title: str = "Sentiment Breakdown per Lesson") -> go.Figure:
"""Stacked bar: sentiment category counts per lesson (one path at a time)."""
sub = df[df["learning_path_id"] == path_id] if path_id is not None else df
sub = sub.sort_values(x_col)
if sub.empty:
return self._empty("No data")
sentiment_cols = [s for s in self.sentiment_order
if s in sub.columns and s != "avg_sentiment_score"]
if not sentiment_cols:
return self._empty("No sentiment columns found")
fig = go.Figure()
for s in sentiment_cols:
fig.add_trace(go.Bar(
x=sub[x_col], y=sub[s], name=s,
marker_color=self.sentiment_colors.get(s, "#CCCCCC"),
hovertemplate=f"<b>{s}</b><br>Lesson %{{x}}<br>Count: %{{y}}<extra></extra>",
))
fig.update_layout(
title=title, barmode="stack",
xaxis_title="Lesson Number", yaxis_title="Comment Count",
height=self.chart_height,
legend=dict(orientation="h", yanchor="bottom", y=1.02),
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 3. Completion Funnel
# ─────────────────────────────────────────────────────────────────────────
def create_completion_funnel(self, df: pd.DataFrame,
x_col: str = "lesson_number",
title: str = "Completion Rate Along the Path") -> go.Figure:
"""Area chart: completion_rate per lesson, one line per learning path."""
if df.empty or "completion_rate" not in df.columns:
return self._empty("completion_rate not available")
fig = go.Figure()
path_ids = sorted(df["learning_path_id"].unique())
for pid in path_ids:
sub = df[df["learning_path_id"] == pid].sort_values(x_col)
if sub.empty:
continue
label = label_for_path(pid, self.lp_config)
color = self._path_color(pid, path_ids)
fig.add_trace(go.Scatter(
x=sub[x_col],
y=(sub["completion_rate"] * 100).round(1),
name=label, mode="lines+markers",
line=dict(color=color, width=2), marker=dict(size=5),
fill="tozeroy",
fillcolor=self._hex_to_rgba(color, 0.12),
hovertemplate=(
f"<b>{label}</b><br>"
"Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>"
),
))
fig.update_layout(
title=title, xaxis_title="Lesson Number",
yaxis_title="Completion Rate (%)", yaxis=dict(range=[0, 105]),
height=self.chart_height, hovermode="x unified",
legend=dict(orientation="h", yanchor="bottom", y=1.02),
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 4. Video Engagement
# ─────────────────────────────────────────────────────────────────────────
def create_video_engagement(self, df: pd.DataFrame,
x_col: str = "lesson_order",
title: str = "Video Completion Rate per Lesson") -> go.Figure:
"""Bar chart: video_completion_rate per lesson."""
if df.empty or "video_completion_rate" not in df.columns:
return self._empty("video_completion_rate not available")
fig = go.Figure()
path_ids = sorted(df["learning_path_id"].unique())
for pid in path_ids:
sub = df[df["learning_path_id"] == pid].sort_values(x_col)
if sub.empty:
continue
label = label_for_path(pid, self.lp_config)
color = self._path_color(pid, path_ids)
fig.add_trace(go.Bar(
x=sub[x_col],
y=(sub["video_completion_rate"].fillna(0) * 100).round(1),
name=label, marker_color=color,
hovertemplate=(
f"<b>{label}</b> β€” Lesson %{{x}}<br>"
"Video Completion: %{y:.1f}%<br>"
"Starts: %{customdata[0]:,} | Completions: %{customdata[1]:,}"
"<extra></extra>"
),
customdata=sub[["total_starts", "total_completions"]].values,
))
fig.update_layout(
title=title, barmode="group",
xaxis_title="Lesson Number", yaxis_title="Video Completion Rate (%)",
yaxis=dict(range=[0, 105]),
height=self.chart_height,
legend=dict(orientation="h", yanchor="bottom", y=1.02),
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 5. Dual-Axis Engagement (headline chart)
# ─────────────────────────────────────────────────────────────────────────
def create_dual_axis_engagement(self, df: pd.DataFrame,
x_col: str = "lesson_order",
path_id: Optional[int] = None,
title: str = "Completion Rate vs Sentiment Score") -> go.Figure:
"""Dual-axis: completion_rate (bars, left) + avg_sentiment_score (line, right)."""
sub = df[df["learning_path_id"] == path_id].sort_values(x_col) \
if path_id is not None else df.sort_values(x_col)
if sub.empty:
return self._empty("No data")
fig = make_subplots(specs=[[{"secondary_y": True}]])
has_completion = "completion_rate" in sub.columns and sub["completion_rate"].notna().any()
has_sentiment = ("avg_sentiment_score" in sub.columns
and sub["avg_sentiment_score"].notna().any())
if has_completion:
fig.add_trace(go.Bar(
x=sub[x_col],
y=(sub["completion_rate"].fillna(0) * 100).round(1),
name="Completion Rate (%)",
marker_color="#1982C4", opacity=0.7,
hovertemplate="Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>",
), secondary_y=False)
if has_sentiment:
fig.add_trace(go.Scatter(
x=sub[x_col], y=sub["avg_sentiment_score"].round(2),
name="Avg Sentiment Score",
mode="lines+markers",
line=dict(color="#FF6B35", width=2), marker=dict(size=6),
hovertemplate="Lesson %{x}<br>Sentiment: %{y:.2f}<extra></extra>",
), secondary_y=True)
fig.add_hline(y=0, line_dash="dot", line_color="gray",
opacity=0.5, secondary_y=True)
if not has_sentiment:
fig.add_annotation(
text="No comment data for sentiment scoring",
xref="paper", yref="paper", x=0.5, y=0.95,
showarrow=False, font=dict(size=11, color="gray"),
)
fig.update_layout(
title=title, xaxis_title="Lesson Number", height=self.chart_height,
legend=dict(orientation="h", yanchor="bottom", y=1.02),
hovermode="x unified",
)
fig.update_yaxes(title_text="Completion Rate (%)", range=[0, 105],
secondary_y=False)
fig.update_yaxes(title_text="Avg Sentiment Score",
range=[-2.2, 2.2], secondary_y=True)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 6. Comment Volume per Lesson
# ─────────────────────────────────────────────────────────────────────────
def create_comment_volume_chart(self, df: pd.DataFrame,
x_col: str = "lesson_order",
title: str = "Comment Volume per Lesson") -> go.Figure:
"""Bar chart: total_comments per lesson, one trace per learning path."""
if df.empty or "total_comments" not in df.columns:
return self._empty("No comment volume data available")
fig = go.Figure()
path_ids = sorted(df["learning_path_id"].unique()) if "learning_path_id" in df.columns else [None]
for pid in path_ids:
sub = df[df["learning_path_id"] == pid].sort_values(x_col) if pid is not None else df
if sub.empty:
continue
label = label_for_path(pid, self.lp_config) if pid is not None else "All Lessons"
color = self._path_color(pid, path_ids)
fig.add_trace(go.Bar(
x=sub[x_col],
y=sub["total_comments"],
name=label,
marker_color=color,
hovertemplate=(
f"<b>{label}</b> β€” Lesson %{{x}}<br>"
"Comments: %{y:,}<extra></extra>"
),
))
fig.update_layout(
title=title, barmode="group",
xaxis_title="Lesson Number", yaxis_title="Total Comments",
height=self.chart_height,
legend=dict(orientation="h", yanchor="bottom", y=1.02),
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 7. Drop-off callout chart
# ─────────────────────────────────────────────────────────────────────────
def create_dropoff_chart(self, dropoffs: pd.DataFrame,
title: str = "Top Drop-off Points") -> go.Figure:
"""Horizontal bar: largest lesson-to-lesson completion drops."""
if dropoffs.empty:
return self._empty("No significant drop-offs detected")
labels = [
f"{label_for_path(row['learning_path_id'], self.lp_config)} L{row.get('lesson_order', row.get('lesson_number','?'))}: "
f"{short_title(row.get('content_title'), 28)}"
for _, row in dropoffs.iterrows()
]
values = (dropoffs["dropoff"] * 100).round(1)
fig = go.Figure(go.Bar(
y=labels, x=values, orientation="h",
marker=dict(color="#D32F2F", opacity=0.85),
text=[f"-{v:.1f}%" for v in values], textposition="auto",
hovertemplate="<b>%{y}</b><br>Drop: -%{x:.1f}%<extra></extra>",
))
fig.update_layout(
title=title, xaxis_title="Completion Rate Drop (%)",
height=max(300, len(dropoffs) * 60 + 80),
yaxis={"categoryorder": "total ascending"},
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 8. Lesson Γ— Sentiment Heatmap
# ─────────────────────────────────────────────────────────────────────────
def create_lesson_sentiment_heatmap(self, df: pd.DataFrame,
path_id: Optional[int] = None,
title: str = "Sentiment Heatmap per Lesson") -> go.Figure:
"""Heatmap: lesson_order Γ— sentiment_category (count)."""
sub = df[df["learning_path_id"] == path_id] if path_id is not None else df
sub = sub.sort_values("lesson_order")
if sub.empty:
return self._empty("No data")
sent_cols = [s for s in self.sentiment_order if s in sub.columns]
if not sent_cols:
return self._empty("No sentiment columns")
z_data = sub[sent_cols].fillna(0).values.T
x_labels = [f"L{r}" for r in sub["lesson_order"]]
y_labels = sent_cols
fig = go.Figure(go.Heatmap(
z=z_data, x=x_labels, y=y_labels,
colorscale="Blues",
text=z_data, texttemplate="%{text:.0f}",
hovertemplate="<b>%{y}</b> β€” Lesson %{x}<br>Count: %{z}<extra></extra>",
colorbar=dict(title="Comments"),
))
fig.update_layout(
title=title, xaxis_title="Lesson", yaxis_title="Sentiment",
height=self.chart_height,
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 9. Method-wide completion (single continuous funnel)
# ─────────────────────────────────────────────────────────────────────────
def create_method_funnel(self, df: pd.DataFrame,
title: str = "Method-Wide Completion Funnel") -> go.Figure:
"""Area line across method_lesson_number with path-boundary markers."""
if df.empty or "completion_rate" not in df.columns:
return self._empty("method-wide data not available")
x_col = "method_lesson_number" if "method_lesson_number" in df.columns else "lesson_order"
df_s = df.sort_values(x_col)
fig = go.Figure()
# Shaded area per learning path for orientation
path_ids = df_s["learning_path_id"].unique()
for pid in sorted(path_ids):
sub = df_s[df_s["learning_path_id"] == pid]
if sub.empty:
continue
color = self._path_color(pid, sorted(path_ids))
fig.add_trace(go.Scatter(
x=sub[x_col],
y=(sub["completion_rate"].fillna(0) * 100).round(1),
name=label_for_path(pid, self.lp_config),
mode="lines+markers", line=dict(color=color, width=2),
marker=dict(size=5), fill="tozeroy",
fillcolor=self._hex_to_rgba(color, 0.13),
hovertemplate="Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>",
))
# Vertical lines at path boundaries
boundaries = df_s.groupby("learning_path_id")[x_col].min().sort_values()
for i, (pid, boundary) in enumerate(boundaries.items()):
if i == 0:
continue
fig.add_vline(x=boundary - 0.5, line_dash="dash",
line_color="gray", opacity=0.5)
fig.add_annotation(
x=boundary - 0.5, y=105,
text=label_for_path(pid, self.lp_config),
showarrow=False, textangle=-90,
font=dict(size=9, color="gray"),
)
fig.update_layout(
title=title, xaxis_title="Method Lesson Number",
yaxis_title="Completion Rate (%)", yaxis=dict(range=[0, 115]),
height=self.chart_height, hovermode="x unified",
legend=dict(orientation="h", yanchor="bottom", y=1.02),
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# 10. Sentiment journey β€” method-wide (continuous x)
# ─────────────────────────────────────────────────────────────────────────
def create_method_sentiment_journey(self, df: pd.DataFrame,
title: str = "Sentiment Journey β€” Full Method") -> go.Figure:
if df.empty or "avg_sentiment_score" not in df.columns:
return self._empty("avg_sentiment_score not available")
x_col = "method_lesson_number" if "method_lesson_number" in df.columns else "lesson_order"
df_s = df.sort_values(x_col)
path_ids = sorted(df_s["learning_path_id"].unique())
fig = go.Figure()
for pid in path_ids:
sub = df_s[df_s["learning_path_id"] == pid]
if sub.empty:
continue
color = self._path_color(pid, path_ids)
fig.add_trace(go.Scatter(
x=sub[x_col], y=sub["avg_sentiment_score"].round(2),
name=label_for_path(pid, self.lp_config),
mode="lines+markers", line=dict(color=color, width=2),
marker=dict(size=5),
hovertemplate="Lesson %{x}<br>Score: %{y:.2f}<extra></extra>",
))
fig.add_hline(y=0, line_dash="dot", line_color="gray", opacity=0.5)
fig.update_layout(
title=title, xaxis_title="Method Lesson Number",
yaxis_title="Avg Sentiment Score",
yaxis=dict(range=[-2.2, 2.2]),
height=self.chart_height, hovermode="x unified",
legend=dict(orientation="h", yanchor="bottom", y=1.02),
)
return fig
# ─────────────────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────────────────
@staticmethod
def _hex_to_rgba(hex_color: str, alpha: float = 0.15) -> str:
"""Convert a #RRGGBB hex string to an rgba(...) string safe for Plotly fillcolor."""
h = hex_color.lstrip("#")
if len(h) == 6:
r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
return f"rgba({r},{g},{b},{alpha})"
# Fall back to a neutral transparent fill if the color is unexpected
return f"rgba(150,150,150,{alpha})"
def _path_color(self, path_id, all_path_ids: list) -> str:
palette = [
"#1982C4", "#FF6B35", "#6A4C93", "#4CAF50",
"#E91E63", "#00BCD4", "#FF9800", "#9C27B0",
]
try:
idx = list(all_path_ids).index(path_id)
return palette[idx % len(palette)]
except (ValueError, TypeError):
return "#607D8B"
@staticmethod
def _empty(message: str, height: int = 300) -> go.Figure:
fig = go.Figure()
fig.add_annotation(text=message, xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False, font=dict(size=14))
fig.update_layout(height=height)
return fig