Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

File size: 25,005 Bytes

599973c

"""
Learning Paths — Plotly chart factory.
All methods accept a merged lesson-metrics DataFrame (output of
learning_paths_utils.merge_lesson_metrics / merge_method_wide) and return
a plotly Figure.
"""
import json
import sys
from pathlib import Path
from typing import Optional

import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

_root = Path(__file__).resolve().parent.parent.parent
if str(_root) not in sys.path:
    sys.path.insert(0, str(_root))

from visualization.utils.learning_paths_utils import short_title, label_for_path


class LearningPathsCharts:
    """Plotly chart factory for learning-path lesson-level data."""

    SENTIMENT_SCORE_COLORS = {
        "avg_sentiment_score": "#1982C4",
    }

    def __init__(self, config_path=None):
        if config_path is None:
            config_path = Path(__file__).parent.parent / "config" / "viz_config.json"
        with open(config_path) as f:
            cfg = json.load(f)

        self.sentiment_colors = cfg.get("color_schemes", {}).get("sentiment_polarity", {})
        self.sentiment_order  = cfg.get("sentiment_order", [])
        self.lp_config        = cfg.get("learning_paths", {})
        self.brand_colors     = self.lp_config.get("brand_colors", {})
        self.chart_height     = cfg.get("dashboard", {}).get("chart_height", 400)

    # ─────────────────────────────────────────────────────────────────────────
    # 1. Sentiment Journey (avg_sentiment_score per lesson)
    # ─────────────────────────────────────────────────────────────────────────

    def create_sentiment_journey(self, df: pd.DataFrame,
                                 x_col: str = "lesson_order",
                                 group_col: str = "learning_path_id",
                                 title: str = "Sentiment Journey Along the Path") -> go.Figure:
        """Line chart: avg_sentiment_score per lesson, one line per learning path."""
        if df.empty or "avg_sentiment_score" not in df.columns or df["avg_sentiment_score"].isna().all():
            return self._empty("No comment data available for sentiment scoring")

        fig = go.Figure()
        path_ids = sorted(df[group_col].unique()) if group_col in df.columns else [None]

        for pid in path_ids:
            sub = df[df[group_col] == pid].sort_values(x_col) if pid is not None else df
            if sub.empty:
                continue
            label = label_for_path(pid, self.lp_config) if pid is not None else "All Lessons"
            color = self._path_color(pid, path_ids)
            x_vals = sub[x_col]
            y_vals = sub["avg_sentiment_score"]
            hover_texts = [
                f"<b>Lesson {row[x_col]}</b><br>{short_title(row.get('content_title'))}<br>"
                f"Score: {row['avg_sentiment_score']:.2f}<br>Comments: {int(row.get('total_comments', 0)):,}"
                for _, row in sub.iterrows()
            ]
            fig.add_trace(go.Scatter(
                x=x_vals, y=y_vals, name=label,
                mode="lines+markers",
                line=dict(color=color, width=2),
                marker=dict(size=6),
                hovertext=hover_texts, hoverinfo="text",
            ))

        fig.add_hline(y=0, line_dash="dot", line_color="gray", opacity=0.5)
        fig.update_layout(
            title=title, xaxis_title="Lesson Number", yaxis_title="Avg Sentiment Score",
            yaxis=dict(range=[-2.2, 2.2], tickvals=[-2, -1, 0, 1, 2],
                       ticktext=["Very Negative", "Negative", "Neutral",
                                 "Positive", "Very Positive"]),
            height=self.chart_height, hovermode="x unified",
            legend=dict(orientation="h", yanchor="bottom", y=1.02),
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 2. Sentiment Stacked Bar (counts per sentiment per lesson)
    # ─────────────────────────────────────────────────────────────────────────

    def create_sentiment_stacked_bar(self, df: pd.DataFrame,
                                     x_col: str = "lesson_order",
                                     path_id: Optional[int] = None,
                                     title: str = "Sentiment Breakdown per Lesson") -> go.Figure:
        """Stacked bar: sentiment category counts per lesson (one path at a time)."""
        sub = df[df["learning_path_id"] == path_id] if path_id is not None else df
        sub = sub.sort_values(x_col)
        if sub.empty:
            return self._empty("No data")

        sentiment_cols = [s for s in self.sentiment_order
                          if s in sub.columns and s != "avg_sentiment_score"]
        if not sentiment_cols:
            return self._empty("No sentiment columns found")

        fig = go.Figure()
        for s in sentiment_cols:
            fig.add_trace(go.Bar(
                x=sub[x_col], y=sub[s], name=s,
                marker_color=self.sentiment_colors.get(s, "#CCCCCC"),
                hovertemplate=f"<b>{s}</b><br>Lesson %{{x}}<br>Count: %{{y}}<extra></extra>",
            ))
        fig.update_layout(
            title=title, barmode="stack",
            xaxis_title="Lesson Number", yaxis_title="Comment Count",
            height=self.chart_height,
            legend=dict(orientation="h", yanchor="bottom", y=1.02),
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 3. Completion Funnel
    # ─────────────────────────────────────────────────────────────────────────

    def create_completion_funnel(self, df: pd.DataFrame,
                                 x_col: str = "lesson_number",
                                 title: str = "Completion Rate Along the Path") -> go.Figure:
        """Area chart: completion_rate per lesson, one line per learning path."""
        if df.empty or "completion_rate" not in df.columns:
            return self._empty("completion_rate not available")

        fig = go.Figure()
        path_ids = sorted(df["learning_path_id"].unique())

        for pid in path_ids:
            sub = df[df["learning_path_id"] == pid].sort_values(x_col)
            if sub.empty:
                continue
            label = label_for_path(pid, self.lp_config)
            color = self._path_color(pid, path_ids)
            fig.add_trace(go.Scatter(
                x=sub[x_col],
                y=(sub["completion_rate"] * 100).round(1),
                name=label, mode="lines+markers",
                line=dict(color=color, width=2), marker=dict(size=5),
                fill="tozeroy",
                fillcolor=self._hex_to_rgba(color, 0.12),
                hovertemplate=(
                    f"<b>{label}</b><br>"
                    "Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>"
                ),
            ))

        fig.update_layout(
            title=title, xaxis_title="Lesson Number",
            yaxis_title="Completion Rate (%)", yaxis=dict(range=[0, 105]),
            height=self.chart_height, hovermode="x unified",
            legend=dict(orientation="h", yanchor="bottom", y=1.02),
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 4. Video Engagement
    # ─────────────────────────────────────────────────────────────────────────

    def create_video_engagement(self, df: pd.DataFrame,
                                x_col: str = "lesson_order",
                                title: str = "Video Completion Rate per Lesson") -> go.Figure:
        """Bar chart: video_completion_rate per lesson."""
        if df.empty or "video_completion_rate" not in df.columns:
            return self._empty("video_completion_rate not available")

        fig = go.Figure()
        path_ids = sorted(df["learning_path_id"].unique())

        for pid in path_ids:
            sub = df[df["learning_path_id"] == pid].sort_values(x_col)
            if sub.empty:
                continue
            label = label_for_path(pid, self.lp_config)
            color = self._path_color(pid, path_ids)
            fig.add_trace(go.Bar(
                x=sub[x_col],
                y=(sub["video_completion_rate"].fillna(0) * 100).round(1),
                name=label, marker_color=color,
                hovertemplate=(
                    f"<b>{label}</b> — Lesson %{{x}}<br>"
                    "Video Completion: %{y:.1f}%<br>"
                    "Starts: %{customdata[0]:,} | Completions: %{customdata[1]:,}"
                    "<extra></extra>"
                ),
                customdata=sub[["total_starts", "total_completions"]].values,
            ))

        fig.update_layout(
            title=title, barmode="group",
            xaxis_title="Lesson Number", yaxis_title="Video Completion Rate (%)",
            yaxis=dict(range=[0, 105]),
            height=self.chart_height,
            legend=dict(orientation="h", yanchor="bottom", y=1.02),
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 5. Dual-Axis Engagement (headline chart)
    # ─────────────────────────────────────────────────────────────────────────

    def create_dual_axis_engagement(self, df: pd.DataFrame,
                                    x_col: str = "lesson_order",
                                    path_id: Optional[int] = None,
                                    title: str = "Completion Rate vs Sentiment Score") -> go.Figure:
        """Dual-axis: completion_rate (bars, left) + avg_sentiment_score (line, right)."""
        sub = df[df["learning_path_id"] == path_id].sort_values(x_col) \
              if path_id is not None else df.sort_values(x_col)
        if sub.empty:
            return self._empty("No data")

        fig = make_subplots(specs=[[{"secondary_y": True}]])

        has_completion = "completion_rate" in sub.columns and sub["completion_rate"].notna().any()
        has_sentiment  = ("avg_sentiment_score" in sub.columns
                          and sub["avg_sentiment_score"].notna().any())

        if has_completion:
            fig.add_trace(go.Bar(
                x=sub[x_col],
                y=(sub["completion_rate"].fillna(0) * 100).round(1),
                name="Completion Rate (%)",
                marker_color="#1982C4", opacity=0.7,
                hovertemplate="Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>",
            ), secondary_y=False)

        if has_sentiment:
            fig.add_trace(go.Scatter(
                x=sub[x_col], y=sub["avg_sentiment_score"].round(2),
                name="Avg Sentiment Score",
                mode="lines+markers",
                line=dict(color="#FF6B35", width=2), marker=dict(size=6),
                hovertemplate="Lesson %{x}<br>Sentiment: %{y:.2f}<extra></extra>",
            ), secondary_y=True)
            fig.add_hline(y=0, line_dash="dot", line_color="gray",
                          opacity=0.5, secondary_y=True)

        if not has_sentiment:
            fig.add_annotation(
                text="No comment data for sentiment scoring",
                xref="paper", yref="paper", x=0.5, y=0.95,
                showarrow=False, font=dict(size=11, color="gray"),
            )

        fig.update_layout(
            title=title, xaxis_title="Lesson Number", height=self.chart_height,
            legend=dict(orientation="h", yanchor="bottom", y=1.02),
            hovermode="x unified",
        )
        fig.update_yaxes(title_text="Completion Rate (%)", range=[0, 105],
                         secondary_y=False)
        fig.update_yaxes(title_text="Avg Sentiment Score",
                         range=[-2.2, 2.2], secondary_y=True)
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 6. Comment Volume per Lesson
    # ─────────────────────────────────────────────────────────────────────────

    def create_comment_volume_chart(self, df: pd.DataFrame,
                                    x_col: str = "lesson_order",
                                    title: str = "Comment Volume per Lesson") -> go.Figure:
        """Bar chart: total_comments per lesson, one trace per learning path."""
        if df.empty or "total_comments" not in df.columns:
            return self._empty("No comment volume data available")

        fig = go.Figure()
        path_ids = sorted(df["learning_path_id"].unique()) if "learning_path_id" in df.columns else [None]

        for pid in path_ids:
            sub = df[df["learning_path_id"] == pid].sort_values(x_col) if pid is not None else df
            if sub.empty:
                continue
            label = label_for_path(pid, self.lp_config) if pid is not None else "All Lessons"
            color = self._path_color(pid, path_ids)
            fig.add_trace(go.Bar(
                x=sub[x_col],
                y=sub["total_comments"],
                name=label,
                marker_color=color,
                hovertemplate=(
                    f"<b>{label}</b> — Lesson %{{x}}<br>"
                    "Comments: %{y:,}<extra></extra>"
                ),
            ))

        fig.update_layout(
            title=title, barmode="group",
            xaxis_title="Lesson Number", yaxis_title="Total Comments",
            height=self.chart_height,
            legend=dict(orientation="h", yanchor="bottom", y=1.02),
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 7. Drop-off callout chart
    # ─────────────────────────────────────────────────────────────────────────

    def create_dropoff_chart(self, dropoffs: pd.DataFrame,
                             title: str = "Top Drop-off Points") -> go.Figure:
        """Horizontal bar: largest lesson-to-lesson completion drops."""
        if dropoffs.empty:
            return self._empty("No significant drop-offs detected")

        labels = [
            f"{label_for_path(row['learning_path_id'], self.lp_config)} L{row.get('lesson_order', row.get('lesson_number','?'))}: "
            f"{short_title(row.get('content_title'), 28)}"
            for _, row in dropoffs.iterrows()
        ]
        values = (dropoffs["dropoff"] * 100).round(1)

        fig = go.Figure(go.Bar(
            y=labels, x=values, orientation="h",
            marker=dict(color="#D32F2F", opacity=0.85),
            text=[f"-{v:.1f}%" for v in values], textposition="auto",
            hovertemplate="<b>%{y}</b><br>Drop: -%{x:.1f}%<extra></extra>",
        ))
        fig.update_layout(
            title=title, xaxis_title="Completion Rate Drop (%)",
            height=max(300, len(dropoffs) * 60 + 80),
            yaxis={"categoryorder": "total ascending"},
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 8. Lesson × Sentiment Heatmap
    # ─────────────────────────────────────────────────────────────────────────

    def create_lesson_sentiment_heatmap(self, df: pd.DataFrame,
                                        path_id: Optional[int] = None,
                                        title: str = "Sentiment Heatmap per Lesson") -> go.Figure:
        """Heatmap: lesson_order × sentiment_category (count)."""
        sub = df[df["learning_path_id"] == path_id] if path_id is not None else df
        sub = sub.sort_values("lesson_order")
        if sub.empty:
            return self._empty("No data")

        sent_cols = [s for s in self.sentiment_order if s in sub.columns]
        if not sent_cols:
            return self._empty("No sentiment columns")

        z_data    = sub[sent_cols].fillna(0).values.T
        x_labels  = [f"L{r}" for r in sub["lesson_order"]]
        y_labels  = sent_cols

        fig = go.Figure(go.Heatmap(
            z=z_data, x=x_labels, y=y_labels,
            colorscale="Blues",
            text=z_data, texttemplate="%{text:.0f}",
            hovertemplate="<b>%{y}</b> — Lesson %{x}<br>Count: %{z}<extra></extra>",
            colorbar=dict(title="Comments"),
        ))
        fig.update_layout(
            title=title, xaxis_title="Lesson", yaxis_title="Sentiment",
            height=self.chart_height,
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 9. Method-wide completion (single continuous funnel)
    # ─────────────────────────────────────────────────────────────────────────

    def create_method_funnel(self, df: pd.DataFrame,
                             title: str = "Method-Wide Completion Funnel") -> go.Figure:
        """Area line across method_lesson_number with path-boundary markers."""
        if df.empty or "completion_rate" not in df.columns:
            return self._empty("method-wide data not available")

        x_col = "method_lesson_number" if "method_lesson_number" in df.columns else "lesson_order"
        df_s = df.sort_values(x_col)

        fig = go.Figure()

        # Shaded area per learning path for orientation
        path_ids = df_s["learning_path_id"].unique()
        for pid in sorted(path_ids):
            sub = df_s[df_s["learning_path_id"] == pid]
            if sub.empty:
                continue
            color = self._path_color(pid, sorted(path_ids))
            fig.add_trace(go.Scatter(
                x=sub[x_col],
                y=(sub["completion_rate"].fillna(0) * 100).round(1),
                name=label_for_path(pid, self.lp_config),
                mode="lines+markers", line=dict(color=color, width=2),
                marker=dict(size=5), fill="tozeroy",
                fillcolor=self._hex_to_rgba(color, 0.13),
                hovertemplate="Lesson %{x}<br>Completion: %{y:.1f}%<extra></extra>",
            ))

        # Vertical lines at path boundaries
        boundaries = df_s.groupby("learning_path_id")[x_col].min().sort_values()
        for i, (pid, boundary) in enumerate(boundaries.items()):
            if i == 0:
                continue
            fig.add_vline(x=boundary - 0.5, line_dash="dash",
                          line_color="gray", opacity=0.5)
            fig.add_annotation(
                x=boundary - 0.5, y=105,
                text=label_for_path(pid, self.lp_config),
                showarrow=False, textangle=-90,
                font=dict(size=9, color="gray"),
            )

        fig.update_layout(
            title=title, xaxis_title="Method Lesson Number",
            yaxis_title="Completion Rate (%)", yaxis=dict(range=[0, 115]),
            height=self.chart_height, hovermode="x unified",
            legend=dict(orientation="h", yanchor="bottom", y=1.02),
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # 10. Sentiment journey — method-wide (continuous x)
    # ─────────────────────────────────────────────────────────────────────────

    def create_method_sentiment_journey(self, df: pd.DataFrame,
                                        title: str = "Sentiment Journey — Full Method") -> go.Figure:
        if df.empty or "avg_sentiment_score" not in df.columns:
            return self._empty("avg_sentiment_score not available")

        x_col = "method_lesson_number" if "method_lesson_number" in df.columns else "lesson_order"
        df_s = df.sort_values(x_col)

        path_ids = sorted(df_s["learning_path_id"].unique())
        fig = go.Figure()
        for pid in path_ids:
            sub = df_s[df_s["learning_path_id"] == pid]
            if sub.empty:
                continue
            color = self._path_color(pid, path_ids)
            fig.add_trace(go.Scatter(
                x=sub[x_col], y=sub["avg_sentiment_score"].round(2),
                name=label_for_path(pid, self.lp_config),
                mode="lines+markers", line=dict(color=color, width=2),
                marker=dict(size=5),
                hovertemplate="Lesson %{x}<br>Score: %{y:.2f}<extra></extra>",
            ))

        fig.add_hline(y=0, line_dash="dot", line_color="gray", opacity=0.5)
        fig.update_layout(
            title=title, xaxis_title="Method Lesson Number",
            yaxis_title="Avg Sentiment Score",
            yaxis=dict(range=[-2.2, 2.2]),
            height=self.chart_height, hovermode="x unified",
            legend=dict(orientation="h", yanchor="bottom", y=1.02),
        )
        return fig

    # ─────────────────────────────────────────────────────────────────────────
    # Helpers
    # ─────────────────────────────────────────────────────────────────────────

    @staticmethod
    def _hex_to_rgba(hex_color: str, alpha: float = 0.15) -> str:
        """Convert a #RRGGBB hex string to an rgba(...) string safe for Plotly fillcolor."""
        h = hex_color.lstrip("#")
        if len(h) == 6:
            r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
            return f"rgba({r},{g},{b},{alpha})"
        # Fall back to a neutral transparent fill if the color is unexpected
        return f"rgba(150,150,150,{alpha})"

    def _path_color(self, path_id, all_path_ids: list) -> str:
        palette = [
            "#1982C4", "#FF6B35", "#6A4C93", "#4CAF50",
            "#E91E63", "#00BCD4", "#FF9800", "#9C27B0",
        ]
        try:
            idx = list(all_path_ids).index(path_id)
            return palette[idx % len(palette)]
        except (ValueError, TypeError):
            return "#607D8B"

    @staticmethod
    def _empty(message: str, height: int = 300) -> go.Figure:
        fig = go.Figure()
        fig.add_annotation(text=message, xref="paper", yref="paper",
                           x=0.5, y=0.5, showarrow=False, font=dict(size=14))
        fig.update_layout(height=height)
        return fig