Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /visualizations /learning_paths_charts.py

Danialebrat

Adding Learning path page and improving HelpScout dashboard

599973c 13 days ago

raw

history blame contribute delete

25 kB

	"""
	Learning Paths — Plotly chart factory.
	All methods accept a merged lesson-metrics DataFrame (output of
	learning_paths_utils.merge_lesson_metrics / merge_method_wide) and return
	a plotly Figure.
	"""
	import json
	import sys
	from pathlib import Path
	from typing import Optional

	import pandas as pd
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots

	_root = Path(__file__).resolve().parent.parent.parent
	if str(_root) not in sys.path:
	sys.path.insert(0, str(_root))

	from visualization.utils.learning_paths_utils import short_title, label_for_path


	class LearningPathsCharts:
	"""Plotly chart factory for learning-path lesson-level data."""

	SENTIMENT_SCORE_COLORS = {
	"avg_sentiment_score": "#1982C4",
	}

	def __init__(self, config_path=None):
	if config_path is None:
	config_path = Path(__file__).parent.parent / "config" / "viz_config.json"
	with open(config_path) as f:
	cfg = json.load(f)

	self.sentiment_colors = cfg.get("color_schemes", {}).get("sentiment_polarity", {})
	self.sentiment_order = cfg.get("sentiment_order", [])
	self.lp_config = cfg.get("learning_paths", {})
	self.brand_colors = self.lp_config.get("brand_colors", {})
	self.chart_height = cfg.get("dashboard", {}).get("chart_height", 400)

	# ─────────────────────────────────────────────────────────────────────────
	# 1. Sentiment Journey (avg_sentiment_score per lesson)
	# ─────────────────────────────────────────────────────────────────────────

	def create_sentiment_journey(self, df: pd.DataFrame,
	x_col: str = "lesson_order",
	group_col: str = "learning_path_id",
	title: str = "Sentiment Journey Along the Path") -> go.Figure:
	"""Line chart: avg_sentiment_score per lesson, one line per learning path."""
	if df.empty or "avg_sentiment_score" not in df.columns or df["avg_sentiment_score"].isna().all():
	return self._empty("No comment data available for sentiment scoring")

	fig = go.Figure()
	path_ids = sorted(df[group_col].unique()) if group_col in df.columns else [None]

	for pid in path_ids:
	sub = df[df[group_col] == pid].sort_values(x_col) if pid is not None else df
	if sub.empty:
	continue
	label = label_for_path(pid, self.lp_config) if pid is not None else "All Lessons"
	color = self._path_color(pid, path_ids)
	x_vals = sub[x_col]
	y_vals = sub["avg_sentiment_score"]
	hover_texts = [
	f"<b>Lesson {row[x_col]}</b><br>{short_title(row.get('content_title'))}<br>"
	f"Score: {row['avg_sentiment_score']:.2f}<br>Comments: {int(row.get('total_comments', 0)):,}"
	for _, row in sub.iterrows()
	]
	fig.add_trace(go.Scatter(
	x=x_vals, y=y_vals, name=label,
	mode="lines+markers",
	line=dict(color=color, width=2),
	marker=dict(size=6),
	hovertext=hover_texts, hoverinfo="text",
	))

	fig.add_hline(y=0, line_dash="dot", line_color="gray", opacity=0.5)
	fig.update_layout(
	title=title, xaxis_title="Lesson Number", yaxis_title="Avg Sentiment Score",
	yaxis=dict(range=[-2.2, 2.2], tickvals=[-2, -1, 0, 1, 2],
	ticktext=["Very Negative", "Negative", "Neutral",
	"Positive", "Very Positive"]),
	height=self.chart_height, hovermode="x unified",
	legend=dict(orientation="h", yanchor="bottom", y=1.02),
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 2. Sentiment Stacked Bar (counts per sentiment per lesson)
	# ─────────────────────────────────────────────────────────────────────────

	def create_sentiment_stacked_bar(self, df: pd.DataFrame,
	x_col: str = "lesson_order",
	path_id: Optional[int] = None,
	title: str = "Sentiment Breakdown per Lesson") -> go.Figure:
	"""Stacked bar: sentiment category counts per lesson (one path at a time)."""
	sub = df[df["learning_path_id"] == path_id] if path_id is not None else df
	sub = sub.sort_values(x_col)
	if sub.empty:
	return self._empty("No data")

	sentiment_cols = [s for s in self.sentiment_order
	if s in sub.columns and s != "avg_sentiment_score"]
	if not sentiment_cols:
	return self._empty("No sentiment columns found")

	fig = go.Figure()
	for s in sentiment_cols:
	fig.add_trace(go.Bar(
	x=sub[x_col], y=sub[s], name=s,
	marker_color=self.sentiment_colors.get(s, "#CCCCCC"),
	hovertemplate=f"<b>{s}</b><br>Lesson %{{x}}<br>Count: %{{y}}<extra></extra>",
	))
	fig.update_layout(
	title=title, barmode="stack",
	xaxis_title="Lesson Number", yaxis_title="Comment Count",
	height=self.chart_height,
	legend=dict(orientation="h", yanchor="bottom", y=1.02),
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 3. Completion Funnel
	# ─────────────────────────────────────────────────────────────────────────

	def create_completion_funnel(self, df: pd.DataFrame,
	x_col: str = "lesson_number",
	title: str = "Completion Rate Along the Path") -> go.Figure:
	"""Area chart: completion_rate per lesson, one line per learning path."""
	if df.empty or "completion_rate" not in df.columns:
	return self._empty("completion_rate not available")

	fig = go.Figure()
	path_ids = sorted(df["learning_path_id"].unique())

	for pid in path_ids:
	sub = df[df["learning_path_id"] == pid].sort_values(x_col)
	if sub.empty:
	continue
	label = label_for_path(pid, self.lp_config)
	color = self._path_color(pid, path_ids)
	fig.add_trace(go.Scatter(
	x=sub[x_col],
	y=(sub["completion_rate"] * 100).round(1),
	name=label, mode="lines+markers",
	line=dict(color=color, width=2), marker=dict(size=5),
	fill="tozeroy",
	fillcolor=self._hex_to_rgba(color, 0.12),
	hovertemplate=(
	f"<b>{label}</b><br>"
	"Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>"
	),
	))

	fig.update_layout(
	title=title, xaxis_title="Lesson Number",
	yaxis_title="Completion Rate (%)", yaxis=dict(range=[0, 105]),
	height=self.chart_height, hovermode="x unified",
	legend=dict(orientation="h", yanchor="bottom", y=1.02),
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 4. Video Engagement
	# ─────────────────────────────────────────────────────────────────────────

	def create_video_engagement(self, df: pd.DataFrame,
	x_col: str = "lesson_order",
	title: str = "Video Completion Rate per Lesson") -> go.Figure:
	"""Bar chart: video_completion_rate per lesson."""
	if df.empty or "video_completion_rate" not in df.columns:
	return self._empty("video_completion_rate not available")

	fig = go.Figure()
	path_ids = sorted(df["learning_path_id"].unique())

	for pid in path_ids:
	sub = df[df["learning_path_id"] == pid].sort_values(x_col)
	if sub.empty:
	continue
	label = label_for_path(pid, self.lp_config)
	color = self._path_color(pid, path_ids)
	fig.add_trace(go.Bar(
	x=sub[x_col],
	y=(sub["video_completion_rate"].fillna(0) * 100).round(1),
	name=label, marker_color=color,
	hovertemplate=(
	f"<b>{label}</b> — Lesson %{{x}}<br>"
	"Video Completion: %{y:.1f}%<br>"
	"Starts: %{customdata[0]:,} \| Completions: %{customdata[1]:,}"
	"<extra></extra>"
	),
	customdata=sub[["total_starts", "total_completions"]].values,
	))

	fig.update_layout(
	title=title, barmode="group",
	xaxis_title="Lesson Number", yaxis_title="Video Completion Rate (%)",
	yaxis=dict(range=[0, 105]),
	height=self.chart_height,
	legend=dict(orientation="h", yanchor="bottom", y=1.02),
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 5. Dual-Axis Engagement (headline chart)
	# ─────────────────────────────────────────────────────────────────────────

	def create_dual_axis_engagement(self, df: pd.DataFrame,
	x_col: str = "lesson_order",
	path_id: Optional[int] = None,
	title: str = "Completion Rate vs Sentiment Score") -> go.Figure:
	"""Dual-axis: completion_rate (bars, left) + avg_sentiment_score (line, right)."""
	sub = df[df["learning_path_id"] == path_id].sort_values(x_col) \
	if path_id is not None else df.sort_values(x_col)
	if sub.empty:
	return self._empty("No data")

	fig = make_subplots(specs=[[{"secondary_y": True}]])

	has_completion = "completion_rate" in sub.columns and sub["completion_rate"].notna().any()
	has_sentiment = ("avg_sentiment_score" in sub.columns
	and sub["avg_sentiment_score"].notna().any())

	if has_completion:
	fig.add_trace(go.Bar(
	x=sub[x_col],
	y=(sub["completion_rate"].fillna(0) * 100).round(1),
	name="Completion Rate (%)",
	marker_color="#1982C4", opacity=0.7,
	hovertemplate="Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>",
	), secondary_y=False)

	if has_sentiment:
	fig.add_trace(go.Scatter(
	x=sub[x_col], y=sub["avg_sentiment_score"].round(2),
	name="Avg Sentiment Score",
	mode="lines+markers",
	line=dict(color="#FF6B35", width=2), marker=dict(size=6),
	hovertemplate="Lesson %{x}<br>Sentiment: %{y:.2f}<extra></extra>",
	), secondary_y=True)
	fig.add_hline(y=0, line_dash="dot", line_color="gray",
	opacity=0.5, secondary_y=True)

	if not has_sentiment:
	fig.add_annotation(
	text="No comment data for sentiment scoring",
	xref="paper", yref="paper", x=0.5, y=0.95,
	showarrow=False, font=dict(size=11, color="gray"),
	)

	fig.update_layout(
	title=title, xaxis_title="Lesson Number", height=self.chart_height,
	legend=dict(orientation="h", yanchor="bottom", y=1.02),
	hovermode="x unified",
	)
	fig.update_yaxes(title_text="Completion Rate (%)", range=[0, 105],
	secondary_y=False)
	fig.update_yaxes(title_text="Avg Sentiment Score",
	range=[-2.2, 2.2], secondary_y=True)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 6. Comment Volume per Lesson
	# ─────────────────────────────────────────────────────────────────────────

	def create_comment_volume_chart(self, df: pd.DataFrame,
	x_col: str = "lesson_order",
	title: str = "Comment Volume per Lesson") -> go.Figure:
	"""Bar chart: total_comments per lesson, one trace per learning path."""
	if df.empty or "total_comments" not in df.columns:
	return self._empty("No comment volume data available")

	fig = go.Figure()
	path_ids = sorted(df["learning_path_id"].unique()) if "learning_path_id" in df.columns else [None]

	for pid in path_ids:
	sub = df[df["learning_path_id"] == pid].sort_values(x_col) if pid is not None else df
	if sub.empty:
	continue
	label = label_for_path(pid, self.lp_config) if pid is not None else "All Lessons"
	color = self._path_color(pid, path_ids)
	fig.add_trace(go.Bar(
	x=sub[x_col],
	y=sub["total_comments"],
	name=label,
	marker_color=color,
	hovertemplate=(
	f"<b>{label}</b> — Lesson %{{x}}<br>"
	"Comments: %{y:,}<extra></extra>"
	),
	))

	fig.update_layout(
	title=title, barmode="group",
	xaxis_title="Lesson Number", yaxis_title="Total Comments",
	height=self.chart_height,
	legend=dict(orientation="h", yanchor="bottom", y=1.02),
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 7. Drop-off callout chart
	# ─────────────────────────────────────────────────────────────────────────

	def create_dropoff_chart(self, dropoffs: pd.DataFrame,
	title: str = "Top Drop-off Points") -> go.Figure:
	"""Horizontal bar: largest lesson-to-lesson completion drops."""
	if dropoffs.empty:
	return self._empty("No significant drop-offs detected")

	labels = [
	f"{label_for_path(row['learning_path_id'], self.lp_config)} L{row.get('lesson_order', row.get('lesson_number','?'))}: "
	f"{short_title(row.get('content_title'), 28)}"
	for _, row in dropoffs.iterrows()
	]
	values = (dropoffs["dropoff"] * 100).round(1)

	fig = go.Figure(go.Bar(
	y=labels, x=values, orientation="h",
	marker=dict(color="#D32F2F", opacity=0.85),
	text=[f"-{v:.1f}%" for v in values], textposition="auto",
	hovertemplate="<b>%{y}</b><br>Drop: -%{x:.1f}%<extra></extra>",
	))
	fig.update_layout(
	title=title, xaxis_title="Completion Rate Drop (%)",
	height=max(300, len(dropoffs) * 60 + 80),
	yaxis={"categoryorder": "total ascending"},
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 8. Lesson × Sentiment Heatmap
	# ─────────────────────────────────────────────────────────────────────────

	def create_lesson_sentiment_heatmap(self, df: pd.DataFrame,
	path_id: Optional[int] = None,
	title: str = "Sentiment Heatmap per Lesson") -> go.Figure:
	"""Heatmap: lesson_order × sentiment_category (count)."""
	sub = df[df["learning_path_id"] == path_id] if path_id is not None else df
	sub = sub.sort_values("lesson_order")
	if sub.empty:
	return self._empty("No data")

	sent_cols = [s for s in self.sentiment_order if s in sub.columns]
	if not sent_cols:
	return self._empty("No sentiment columns")

	z_data = sub[sent_cols].fillna(0).values.T
	x_labels = [f"L{r}" for r in sub["lesson_order"]]
	y_labels = sent_cols

	fig = go.Figure(go.Heatmap(
	z=z_data, x=x_labels, y=y_labels,
	colorscale="Blues",
	text=z_data, texttemplate="%{text:.0f}",
	hovertemplate="<b>%{y}</b> — Lesson %{x}<br>Count: %{z}<extra></extra>",
	colorbar=dict(title="Comments"),
	))
	fig.update_layout(
	title=title, xaxis_title="Lesson", yaxis_title="Sentiment",
	height=self.chart_height,
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 9. Method-wide completion (single continuous funnel)
	# ─────────────────────────────────────────────────────────────────────────

	def create_method_funnel(self, df: pd.DataFrame,
	title: str = "Method-Wide Completion Funnel") -> go.Figure:
	"""Area line across method_lesson_number with path-boundary markers."""
	if df.empty or "completion_rate" not in df.columns:
	return self._empty("method-wide data not available")

	x_col = "method_lesson_number" if "method_lesson_number" in df.columns else "lesson_order"
	df_s = df.sort_values(x_col)

	fig = go.Figure()

	# Shaded area per learning path for orientation
	path_ids = df_s["learning_path_id"].unique()
	for pid in sorted(path_ids):
	sub = df_s[df_s["learning_path_id"] == pid]
	if sub.empty:
	continue
	color = self._path_color(pid, sorted(path_ids))
	fig.add_trace(go.Scatter(
	x=sub[x_col],
	y=(sub["completion_rate"].fillna(0) * 100).round(1),
	name=label_for_path(pid, self.lp_config),
	mode="lines+markers", line=dict(color=color, width=2),
	marker=dict(size=5), fill="tozeroy",
	fillcolor=self._hex_to_rgba(color, 0.13),
	hovertemplate="Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>",
	))

	# Vertical lines at path boundaries
	boundaries = df_s.groupby("learning_path_id")[x_col].min().sort_values()
	for i, (pid, boundary) in enumerate(boundaries.items()):
	if i == 0:
	continue
	fig.add_vline(x=boundary - 0.5, line_dash="dash",
	line_color="gray", opacity=0.5)
	fig.add_annotation(
	x=boundary - 0.5, y=105,
	text=label_for_path(pid, self.lp_config),
	showarrow=False, textangle=-90,
	font=dict(size=9, color="gray"),
	)

	fig.update_layout(
	title=title, xaxis_title="Method Lesson Number",
	yaxis_title="Completion Rate (%)", yaxis=dict(range=[0, 115]),
	height=self.chart_height, hovermode="x unified",
	legend=dict(orientation="h", yanchor="bottom", y=1.02),
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# 10. Sentiment journey — method-wide (continuous x)
	# ─────────────────────────────────────────────────────────────────────────

	def create_method_sentiment_journey(self, df: pd.DataFrame,
	title: str = "Sentiment Journey — Full Method") -> go.Figure:
	if df.empty or "avg_sentiment_score" not in df.columns:
	return self._empty("avg_sentiment_score not available")

	x_col = "method_lesson_number" if "method_lesson_number" in df.columns else "lesson_order"
	df_s = df.sort_values(x_col)

	path_ids = sorted(df_s["learning_path_id"].unique())
	fig = go.Figure()
	for pid in path_ids:
	sub = df_s[df_s["learning_path_id"] == pid]
	if sub.empty:
	continue
	color = self._path_color(pid, path_ids)
	fig.add_trace(go.Scatter(
	x=sub[x_col], y=sub["avg_sentiment_score"].round(2),
	name=label_for_path(pid, self.lp_config),
	mode="lines+markers", line=dict(color=color, width=2),
	marker=dict(size=5),
	hovertemplate="Lesson %{x}<br>Score: %{y:.2f}<extra></extra>",
	))

	fig.add_hline(y=0, line_dash="dot", line_color="gray", opacity=0.5)
	fig.update_layout(
	title=title, xaxis_title="Method Lesson Number",
	yaxis_title="Avg Sentiment Score",
	yaxis=dict(range=[-2.2, 2.2]),
	height=self.chart_height, hovermode="x unified",
	legend=dict(orientation="h", yanchor="bottom", y=1.02),
	)
	return fig

	# ─────────────────────────────────────────────────────────────────────────
	# Helpers
	# ─────────────────────────────────────────────────────────────────────────

	@staticmethod
	def _hex_to_rgba(hex_color: str, alpha: float = 0.15) -> str:
	"""Convert a #RRGGBB hex string to an rgba(...) string safe for Plotly fillcolor."""
	h = hex_color.lstrip("#")
	if len(h) == 6:
	r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
	return f"rgba({r},{g},{b},{alpha})"
	# Fall back to a neutral transparent fill if the color is unexpected
	return f"rgba(150,150,150,{alpha})"

	def _path_color(self, path_id, all_path_ids: list) -> str:
	palette = [
	"#1982C4", "#FF6B35", "#6A4C93", "#4CAF50",
	"#E91E63", "#00BCD4", "#FF9800", "#9C27B0",
	]
	try:
	idx = list(all_path_ids).index(path_id)
	return palette[idx % len(palette)]
	except (ValueError, TypeError):
	return "#607D8B"

	@staticmethod
	def _empty(message: str, height: int = 300) -> go.Figure:
	fig = go.Figure()
	fig.add_annotation(text=message, xref="paper", yref="paper",
	x=0.5, y=0.5, showarrow=False, font=dict(size=14))
	fig.update_layout(height=height)
	return fig