Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

File size: 37,405 Bytes

599973c

"""
Learning Paths Page
Analyzes ordered sequences of lessons within Musora app brands.
Shows how engagement and sentiment evolve as students progress through a path.

Architecture:
  - Filter panel → Fetch button → session state → charts + cards
  - Per-Path view: one funnel per learning path, denominator resets per path
  - Method-Wide view: continuous funnel across all paths with one shared denominator
  - All data is `lp_`-prefixed in session state to avoid collision with other pages
"""
import sys
from pathlib import Path
from typing import Optional

import pandas as pd
import streamlit as st

parent_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(parent_dir))

from data.learning_paths_data_loader import LearningPathsDataLoader
from utils.learning_paths_utils import (
    merge_lesson_metrics, merge_method_wide, find_top_dropoffs,
    get_overview_kpis, filter_by_paths, label_for_path, short_title,
    load_lp_config,
)
from visualizations.learning_paths_charts import LearningPathsCharts
from visualizations.distribution_charts import DistributionCharts
from visualizations.demographic_charts import DemographicCharts
from agents.learning_paths_summary_agent import LearningPathsSummaryAgent

_VIEWS = ["Per-Path", "Method-Wide"]


def render_learning_paths(data_loader: LearningPathsDataLoader):
    """Main entry point for the Learning Paths page."""
    st.title("📚 Learning Paths")
    st.markdown(
        "Analyze ordered lesson sequences — see how student engagement and sentiment "
        "evolve as they progress through each learning path."
    )
    st.markdown("---")

    cfg     = load_lp_config()
    charts  = LearningPathsCharts()
    brands  = cfg.get("brands", [])

    if not brands:
        st.error("No brands configured for Learning Paths. Check `config/viz_config.json`.")
        return

    # ── Filter panel ─────────────────────────────────────────────────────────
    st.markdown("### 🎯 Filters")

    filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 2])

    with filter_col1:
        brand = st.selectbox(
            "Brand", options=brands,
            index=0, key="lp_brand",
        )

    with filter_col2:
        view_mode = st.radio(
            "View Mode",
            options=_VIEWS, index=0, horizontal=True,
            key="lp_view_mode",
            help=(
                "**Per-Path**: each path's funnel resets to its own first-lesson count.\n\n"
                "**Method-Wide**: one continuous funnel using a single denominator "
                "(students who started Learning Path 1) — shows true end-to-end attrition."
            ),
        )

    with filter_col3:
        # Path selector — populated after a brand is chosen
        prev_brand_key = st.session_state.get("lp_fetch_key", (None,))[0]
        prev_lesson_map = st.session_state.get("lp_lesson_map", pd.DataFrame())
        if not prev_lesson_map.empty and prev_brand_key == brand:
            available_paths = sorted(prev_lesson_map["learning_path_id"].unique().tolist())
            path_labels_opts = {pid: label_for_path(pid, cfg) for pid in available_paths}
            selected_paths = st.multiselect(
                "Learning Paths (leave empty = all)",
                options=available_paths,
                default=[],
                format_func=lambda pid: path_labels_opts[pid],
                key="lp_selected_paths",
            )
        else:
            selected_paths = []
            st.info("Fetch data to populate path selector.")

    st.markdown("---")

    # ── Fetch key & stale check ───────────────────────────────────────────────
    fetch_key = (brand, view_mode)
    has_data = (
        st.session_state.get("lp_fetch_key") == fetch_key
        and "lp_lesson_map" in st.session_state
        and not st.session_state.get("lp_lesson_map", pd.DataFrame()).empty
    )

    fetch_col, info_col = st.columns([1, 3])
    with fetch_col:
        fetch_clicked = st.button("🚀 Fetch Data", type="primary",
                                  use_container_width=True, key="lp_fetch_btn")
    with info_col:
        if has_data:
            n_lessons = len(st.session_state.get("lp_lesson_map", pd.DataFrame()))
            st.success(f"✅ Loaded **{n_lessons:,}** lessons for **{brand}**")
        elif not fetch_clicked:
            st.info("👆 Select a brand and click **Fetch Data** to load learning path metrics.")

    if fetch_clicked:
        _fetch_all(data_loader, brand, fetch_key)
        st.rerun()

    if not has_data and not fetch_clicked:
        return

    # ── Load merged frames ───────────────────────────────────────────────────
    lesson_map   = st.session_state.get("lp_lesson_map",   pd.DataFrame())
    per_path_df  = st.session_state.get("lp_per_path",     pd.DataFrame())
    method_df    = st.session_state.get("lp_method_wide",  pd.DataFrame())
    video_df     = st.session_state.get("lp_video",        pd.DataFrame())
    sentiment_df = st.session_state.get("lp_sentiment",    pd.DataFrame())

    if view_mode == "Per-Path":
        merged = merge_lesson_metrics(lesson_map, per_path_df, video_df, sentiment_df)
    else:
        merged = merge_method_wide(method_df, video_df, sentiment_df, cfg)

    if merged.empty:
        st.warning("No data returned. Check your Snowflake connection.")
        return

    # Apply path filter (Per-Path only)
    if view_mode == "Per-Path" and selected_paths:
        merged = filter_by_paths(merged, selected_paths)

    # Add path labels
    if "learning_path_id" in merged.columns:
        merged["path_label"] = merged["learning_path_id"].apply(
            lambda pid: label_for_path(pid, cfg)
        )

    # ── Overview KPIs ─────────────────────────────────────────────────────────
    st.markdown("### 📊 Overview")
    kpis = get_overview_kpis(merged)

    k1, k2, k3, k4, k5, k6 = st.columns(6)
    k1.metric("Method Starters",    f"{kpis.get('total_students', 0):,}")
    k2.metric("Avg Completion",     f"{kpis.get('avg_completion_pct', 0):.1f}%")
    k3.metric("Avg Sentiment",      f"{kpis.get('avg_sentiment_score', 0):.2f}",
              help="Scale: −2 (very negative) to +2 (very positive)")
    k4.metric("Total Comments",     f"{kpis.get('total_comments', 0):,}")
    k5.metric("Learning Paths",     f"{kpis.get('n_paths', 0)}")
    k6.metric("Total Lessons",      f"{kpis.get('n_lessons', 0)}")

    st.markdown("---")

    # ── Headline: Dual-Axis Engagement ───────────────────────────────────────
    st.markdown("### 🎯 Engagement Journey")

    if view_mode == "Per-Path":
        path_ids = sorted(merged["learning_path_id"].unique()) \
                   if "learning_path_id" in merged.columns else []
        if len(path_ids) > 1:
            tab_labels = [label_for_path(pid, cfg) for pid in path_ids]
            tabs = st.tabs(tab_labels)
            for tab, pid in zip(tabs, path_ids):
                with tab:
                    st.plotly_chart(
                        charts.create_dual_axis_engagement(merged, path_id=pid,
                            title=f"Completion vs Sentiment — {label_for_path(pid, cfg)}"),
                        use_container_width=True, key=f"lp_dual_{pid}",
                    )
        elif path_ids:
            st.plotly_chart(
                charts.create_dual_axis_engagement(merged, path_id=path_ids[0]),
                use_container_width=True, key="lp_dual_single",
            )
        else:
            st.plotly_chart(
                charts.create_dual_axis_engagement(merged),
                use_container_width=True, key="lp_dual_all",
            )
    else:
        col1, col2 = st.columns(2)
        with col1:
            st.plotly_chart(charts.create_method_funnel(merged),
                            use_container_width=True, key="lp_method_funnel")
        with col2:
            st.plotly_chart(charts.create_method_sentiment_journey(merged),
                            use_container_width=True, key="lp_method_sent")

    st.markdown("---")

    # ── Completion Funnel ─────────────────────────────────────────────────────
    st.markdown("### 📉 Completion Funnel")
    if view_mode == "Per-Path":
        rate_col = "completion_rate"
        x_col    = "lesson_number" if "lesson_number" in merged.columns else "lesson_order"
        st.plotly_chart(charts.create_completion_funnel(merged, x_col=x_col),
                        use_container_width=True, key="lp_completion_funnel")
    else:
        st.info("Method-Wide funnel shown in the Engagement Journey section above.")

    st.markdown("---")

    # ── Video Engagement ──────────────────────────────────────────────────────
    st.markdown("### 🎬 Video Completion Rate")
    st.caption(
        "Of students who *started* a lesson video, what percentage finished it? "
        "This isolates whether the content itself holds attention."
    )
    x_col = "method_lesson_number" if (view_mode == "Method-Wide"
             and "method_lesson_number" in merged.columns) else "lesson_order"
    st.plotly_chart(charts.create_video_engagement(merged, x_col=x_col),
                    use_container_width=True, key="lp_video_chart")

    st.markdown("---")

    # ── Volume Analysis ───────────────────────────────────────────────────────
    st.markdown("### 📊 Volume Analysis")
    st.caption("Total comments per lesson — shows where students are most engaged.")
    x_col_vol = "method_lesson_number" if (view_mode == "Method-Wide"
                and "method_lesson_number" in merged.columns) else "lesson_order"
    st.plotly_chart(
        charts.create_comment_volume_chart(merged, x_col=x_col_vol),
        use_container_width=True, key="lp_volume_chart",
    )

    st.markdown("---")

    # ── Sentiment Journey ─────────────────────────────────────────────────────
    st.markdown("### 💬 Sentiment Journey")
    x_col = "method_lesson_number" if (view_mode == "Method-Wide"
             and "method_lesson_number" in merged.columns) else "lesson_order"
    col1, col2 = st.columns(2)
    with col1:
        st.plotly_chart(charts.create_sentiment_journey(merged, x_col=x_col),
                        use_container_width=True, key="lp_sent_journey")
    with col2:
        # Show stacked bar for first path (or single combined if method-wide)
        focus_pid = sorted(merged["learning_path_id"].unique())[0] \
                    if "learning_path_id" in merged.columns else None
        st.plotly_chart(
            charts.create_sentiment_stacked_bar(
                merged, x_col=x_col, path_id=focus_pid,
                title=f"Sentiment Breakdown — {label_for_path(focus_pid, cfg)}"
                       if focus_pid else "Sentiment Breakdown"),
            use_container_width=True, key="lp_sent_stacked",
        )

    with st.expander("📊 Sentiment Heatmap", expanded=False):
        focus_pid = sorted(merged["learning_path_id"].unique())[0] \
                    if "learning_path_id" in merged.columns else None
        st.plotly_chart(
            charts.create_lesson_sentiment_heatmap(merged, path_id=focus_pid),
            use_container_width=True, key="lp_heatmap",
        )

    st.markdown("---")

    # ── Intent Analysis ───────────────────────────────────────────────────────
    st.markdown("### 🎭 Intent Analysis")
    metadata_df = st.session_state.get("lp_metadata", pd.DataFrame())
    if view_mode == "Per-Path" and selected_paths:
        metadata_df = metadata_df[metadata_df["learning_path_id"].isin(selected_paths)]

    if metadata_df.empty or "intent" not in metadata_df.columns:
        st.info("No intent data available. Load data first.")
    else:
        _render_intent_emotion_tabs(metadata_df, "intent", cfg, "lp_intent")

    st.markdown("---")

    # ── Emotion Analysis ──────────────────────────────────────────────────────
    st.markdown("### 💭 Emotion Analysis")
    metadata_df_emo = st.session_state.get("lp_metadata", pd.DataFrame())
    if view_mode == "Per-Path" and selected_paths:
        metadata_df_emo = metadata_df_emo[metadata_df_emo["learning_path_id"].isin(selected_paths)]

    has_emotions = (
        not metadata_df_emo.empty
        and "emotions" in metadata_df_emo.columns
        and metadata_df_emo["emotions"].notna().any()
    )
    if not has_emotions:
        st.info("No emotion data available. Emotions are extracted for newly processed comments.")
    else:
        _render_intent_emotion_tabs(metadata_df_emo, "emotion", cfg, "lp_emotion")

    st.markdown("---")

    # ── Drop-off Analysis ─────────────────────────────────────────────────────
    st.markdown("### ⚠️ Top Drop-off Points")
    rate_col  = "completion_rate"
    order_col = "method_lesson_number" if (view_mode == "Method-Wide"
                 and "method_lesson_number" in merged.columns) else "lesson_order"
    dropoffs  = find_top_dropoffs(merged, n=7, rate_col=rate_col, order_col=order_col)
    if not dropoffs.empty:
        st.plotly_chart(charts.create_dropoff_chart(dropoffs),
                        use_container_width=True, key="lp_dropoffs")
    else:
        st.info("No significant lesson-to-lesson drop-offs detected.")

    st.markdown("---")

    # ── Demographics ──────────────────────────────────────────────────────────
    st.markdown("### 👥 Demographics")

    commenter_demo = st.session_state.get("lp_commenter_demo", pd.DataFrame())
    student_demo   = st.session_state.get("lp_student_demo",   pd.DataFrame())
    meta_for_demo  = st.session_state.get("lp_metadata",       pd.DataFrame())

    demo_tab1, demo_tab2 = st.tabs(["💬 Commenters", "🎓 All Students"])

    with demo_tab1:
        if commenter_demo.empty:
            st.info("No commenter demographic data available.")
        else:
            _render_demographics(commenter_demo, meta_for_demo, "commenter")

    with demo_tab2:
        if student_demo.empty:
            st.info("No student demographic data available.")
        else:
            _render_demographics(student_demo, pd.DataFrame(), "student")

    st.markdown("---")

    # ── AI Summary ────────────────────────────────────────────────────────────
    st.markdown("### 🤖 AI Learning Journey Summary")
    st.markdown(
        "Generate an LLM-powered narrative describing the student experience "
        "through this learning path — sentiment arcs, retention patterns, and "
        "actionable recommendations for content designers."
    )

    summary_key = (brand, view_mode, tuple(sorted(selected_paths)))
    summary_available = (
        "lp_summary" in st.session_state
        and st.session_state.get("lp_summary_key") == summary_key
        and st.session_state["lp_summary"] is not None
    )

    gen_col, _ = st.columns([1, 3])
    with gen_col:
        gen_clicked = st.button("🧠 Generate AI Summary", type="primary",
                                use_container_width=True, key="lp_gen_summary")

    if gen_clicked:
        comments_df = st.session_state.get("lp_comments", pd.DataFrame())
        with st.spinner("Analysing learning path data with AI… this may take 20–40 seconds…"):
            agent = LearningPathsSummaryAgent()
            focus_pid = sorted(merged["learning_path_id"].unique())[0] \
                        if "learning_path_id" in merged.columns and view_mode == "Per-Path" \
                        else None
            result = agent.process({
                "metrics":    merged,
                "comments":   comments_df,
                "brand":      brand,
                "path_id":    focus_pid,
                "path_label": label_for_path(focus_pid, cfg) if focus_pid else "Full Method",
            })
        st.session_state["lp_summary"]     = result
        st.session_state["lp_summary_key"] = summary_key
        st.rerun()

    if summary_available:
        _render_summary(st.session_state["lp_summary"])

    st.markdown("---")

    # ── Per-Lesson Drill-down ─────────────────────────────────────────────────
    st.markdown("### 📖 Per-Lesson Detail")
    st.caption("Expand any lesson to see the sentiment breakdown and sample comments.")
    _render_lesson_cards(merged, data_loader, brand, cfg)

    st.markdown("---")

    # ── Export CSV ────────────────────────────────────────────────────────────
    st.markdown("### 💾 Export Data")
    export_cols = [c for c in [
        "brand", "learning_path_id", "path_label", "lesson_order", "lesson_content_id",
        "content_title", "lesson_number", "students_completed", "denominator_students",
        "completion_rate", "total_starts", "total_completions", "video_completion_rate",
        "total_comments", "very_positive", "positive", "neutral",
        "negative", "very_negative", "avg_sentiment_score",
    ] if c in merged.columns]
    csv = merged[export_cols].to_csv(index=False)
    st.download_button(
        label="📥 Download as CSV",
        data=csv,
        file_name=f"learning_paths_{brand}.csv",
        mime="text/csv",
        key="lp_csv_download",
    )


# ─────────────────────────────────────────────────────────────────────────────
# Private helpers
# ─────────────────────────────────────────────────────────────────────────────

def _fetch_all(loader: LearningPathsDataLoader, brand: str, fetch_key: tuple):
    """Run all queries and store results in session state."""
    with st.spinner(f"Fetching learning path data for {brand}…"):
        st.session_state["lp_lesson_map"]      = loader.load_lesson_map(brand)
        st.session_state["lp_per_path"]        = loader.load_per_path_completion(brand)
        st.session_state["lp_method_wide"]     = loader.load_method_wide_completion(brand)
        st.session_state["lp_video"]           = loader.load_video_engagement(brand)
        st.session_state["lp_sentiment"]       = loader.load_lesson_sentiment(brand)
        st.session_state["lp_metadata"]        = loader.load_lesson_metadata(brand)
        st.session_state["lp_commenter_demo"]  = loader.load_lp_commenter_demographics(brand)
        st.session_state["lp_student_demo"]    = loader.load_lp_student_demographics(brand)
        st.session_state["lp_fetch_key"]       = fetch_key
        # Invalidate prior summary when brand/mode changes
        st.session_state.pop("lp_summary",     None)
        st.session_state.pop("lp_summary_key", None)
        st.session_state["lp_drill_page"]  = 1


def _render_lesson_cards(merged: pd.DataFrame, loader: LearningPathsDataLoader,
                          brand: str, cfg: dict):
    """Paginated lesson cards (10 per page). Comments fetched on expand."""
    if merged.empty:
        st.info("No lesson data available.")
        return

    per_page = 10
    total    = len(merged)
    if "lp_drill_page" not in st.session_state:
        st.session_state["lp_drill_page"] = 1

    total_pages = max(1, (total + per_page - 1) // per_page)

    if total > per_page:
        pc1, pc2, pc3 = st.columns([1, 2, 1])
        with pc1:
            if st.button("⬅️ Previous", key="lp_prev_top",
                         disabled=st.session_state["lp_drill_page"] == 1):
                st.session_state["lp_drill_page"] -= 1
                st.rerun()
        with pc2:
            pg = st.session_state["lp_drill_page"]
            st.markdown(
                f"<div style='text-align:center;padding-top:8px;'>"
                f"Page {pg} / {total_pages} — {total:,} lessons</div>",
                unsafe_allow_html=True,
            )
        with pc3:
            if st.button("Next ➡️", key="lp_next_top",
                         disabled=st.session_state["lp_drill_page"] >= total_pages):
                st.session_state["lp_drill_page"] += 1
                st.rerun()

    start = (st.session_state["lp_drill_page"] - 1) * per_page
    page_df = merged.iloc[start: start + per_page]

    for _, row in page_df.iterrows():
        _render_single_lesson_card(row, loader, brand, cfg)

    if total > per_page:
        pb1, pb2, pb3 = st.columns([1, 2, 1])
        with pb1:
            if st.button("⬅️ Previous", key="lp_prev_bot",
                         disabled=st.session_state["lp_drill_page"] == 1):
                st.session_state["lp_drill_page"] -= 1
                st.rerun()
        with pb2:
            pg = st.session_state["lp_drill_page"]
            st.markdown(
                f"<div style='text-align:center;padding-top:8px;'>"
                f"Page {pg} / {total_pages}</div>",
                unsafe_allow_html=True,
            )
        with pb3:
            if st.button("Next ➡️", key="lp_next_bot",
                         disabled=st.session_state["lp_drill_page"] >= total_pages):
                st.session_state["lp_drill_page"] += 1
                st.rerun()


def _render_single_lesson_card(row: pd.Series, loader: LearningPathsDataLoader,
                                brand: str, cfg: dict):
    """Render one lesson expander card with metrics + on-demand comments."""
    path_label = label_for_path(row.get("learning_path_id"), cfg)
    order      = int(row.get("lesson_order", 0))
    title      = short_title(row.get("content_title"), 60)
    comp       = row.get("completion_rate")
    sent       = row.get("avg_sentiment_score")
    vcr        = row.get("video_completion_rate")
    comments_n = int(row.get("total_comments", 0))

    sent_emoji = "⚪"
    if pd.notna(sent):
        if sent >= 1.0:   sent_emoji = "🟢"
        elif sent >= 0.0: sent_emoji = "🟡"
        elif sent >= -1.0:sent_emoji = "🟠"
        else:             sent_emoji = "🔴"

    header = (
        f"{sent_emoji} {path_label} › L{order:02d}: {title}"
        f" | Completion: {comp*100:.1f}%" if pd.notna(comp) else
        f"{sent_emoji} {path_label} › L{order:02d}: {title}"
    )

    content_id = int(row.get("lesson_content_id", 0))
    card_key   = f"lp_card_{content_id}"

    with st.expander(header, expanded=False):
        m1, m2, m3, m4 = st.columns(4)
        m1.metric("Completion", f"{comp*100:.1f}%" if pd.notna(comp) else "—")
        m2.metric("Sentiment Score", f"{sent:.2f}" if pd.notna(sent) else "—")
        m3.metric("Video Completion", f"{vcr*100:.1f}%" if pd.notna(vcr) else "—")
        m4.metric("Comments", f"{comments_n:,}")

        # Sentiment mini-bar
        sent_cols = ["very_positive", "positive", "neutral", "negative", "very_negative"]
        totals    = {s: int(row.get(s, 0)) for s in sent_cols}
        total_all = sum(totals.values())
        if total_all > 0:
            bar_parts = " | ".join(
                f"{s.replace('_', ' ').title()}: {totals[s]:,} "
                f"({totals[s]/total_all*100:.1f}%)"
                for s in sent_cols if totals[s] > 0
            )
            st.caption(f"Sentiment distribution: {bar_parts}")

        # On-demand sample comments
        if comments_n > 0:
            if st.button("💬 Load Sample Comments", key=f"lp_load_comments_{content_id}"):
                with st.spinner("Loading comments…"):
                    cache_key = f"lp_comments_{content_id}"
                    if cache_key not in st.session_state:
                        cdf = loader.load_lesson_comments(
                            brand, [content_id],
                            max_per_lesson=20,
                        )
                        st.session_state[cache_key] = cdf

            cache_key = f"lp_comments_{content_id}"
            if cache_key in st.session_state:
                cdf = st.session_state[cache_key]
                if not cdf.empty and "display_text" in cdf.columns:
                    for _, crow in cdf.iterrows():
                        sent_pol = crow.get("sentiment_polarity", "neutral")
                        emoji = {"very_positive": "🟢", "positive": "🟩",
                                 "neutral": "🟡", "negative": "🟠",
                                 "very_negative": "🔴"}.get(sent_pol, "⚪")
                        txt = str(crow.get("display_text", "")).strip()
                        if txt:
                            st.markdown(f"{emoji} {txt}")


def _render_intent_emotion_tabs(
    metadata_df: pd.DataFrame,
    analysis_type: str,
    cfg: dict,
    key_prefix: str,
):
    """Render Overall + per-path tabs for intent or emotion distribution."""
    dist_charts = DistributionCharts()
    path_ids = sorted(metadata_df["learning_path_id"].unique()) \
               if "learning_path_id" in metadata_df.columns else []
    tab_labels = ["Overall"] + [label_for_path(pid, cfg) for pid in path_ids]
    tabs = st.tabs(tab_labels)

    subsets = [metadata_df] + [
        metadata_df[metadata_df["learning_path_id"] == pid] for pid in path_ids
    ]
    titles = ["Overall"] + [label_for_path(pid, cfg) for pid in path_ids]

    for i, (tab, subset, title) in enumerate(zip(tabs, subsets, titles)):
        with tab:
            col1, col2 = st.columns(2)
            if analysis_type == "intent":
                with col1:
                    st.plotly_chart(
                        dist_charts.create_intent_bar_chart(subset, f"Intent — {title}"),
                        use_container_width=True, key=f"{key_prefix}_bar_{i}",
                    )
                with col2:
                    st.plotly_chart(
                        dist_charts.create_intent_pie_chart(subset, f"Intent — {title}"),
                        use_container_width=True, key=f"{key_prefix}_pie_{i}",
                    )
            else:
                with col1:
                    st.plotly_chart(
                        dist_charts.create_emotion_bar_chart(subset, f"Emotion — {title}"),
                        use_container_width=True, key=f"{key_prefix}_bar_{i}",
                    )
                with col2:
                    st.plotly_chart(
                        dist_charts.create_emotion_pie_chart(subset, f"Emotion — {title}"),
                        use_container_width=True, key=f"{key_prefix}_pie_{i}",
                    )


def _render_demographics(
    demo_df: pd.DataFrame,
    metadata_df: pd.DataFrame,
    demo_type: str,
):
    """Render age and experience distribution charts for commenters or students."""
    demo_charts = DemographicCharts()
    has_sentiment = demo_type == "commenter" and not metadata_df.empty

    # Merge metadata with demo data for sentiment cross-tabs (commenters only)
    merged_for_sent = pd.DataFrame()
    if has_sentiment and "author_id" in metadata_df.columns and "user_id" in demo_df.columns:
        meta = metadata_df.copy()
        dem  = demo_df.copy()
        meta["_uid"] = meta["author_id"].astype(str)
        dem["_uid"]  = dem["user_id"].astype(str)
        merged_for_sent = meta.merge(
            dem[["_uid", "age_group", "experience_group"]],
            on="_uid", how="left",
        )

    # ── Summary metrics ───────────────────────────────────────────
    label = "Commenters" if demo_type == "commenter" else "Students"
    total = len(demo_df)
    with_age = (demo_df["age_group"] != "Unknown").sum() if "age_group" in demo_df.columns else 0
    with_exp  = (demo_df["experience_group"] != "Unknown").sum() if "experience_group" in demo_df.columns else 0

    m1, m2, m3 = st.columns(3)
    m1.metric(f"Total {label}", f"{total:,}")
    m2.metric("With Age Data",        f"{with_age:,} ({with_age/total*100:.0f}%)" if total else "0")
    m3.metric("With Experience Data", f"{with_exp:,} ({with_exp/total*100:.0f}%)" if total else "0")

    st.markdown("---")

    # ── Age Distribution ──────────────────────────────────────────
    st.markdown("#### 🎂 Age Distribution")
    if "age_group" in demo_df.columns:
        age_valid = demo_df[demo_df["age_group"] != "Unknown"]
        if not age_valid.empty:
            age_dist = age_valid["age_group"].value_counts().reset_index()
            age_dist.columns = ["age_group", "count"]
            age_dist["percentage"] = (age_dist["count"] / age_dist["count"].sum() * 100).round(2)

            if has_sentiment and not merged_for_sent.empty and "age_group" in merged_for_sent.columns:
                age_sent = _compute_demo_by_sentiment(merged_for_sent, "age_group")
                col1, col2 = st.columns(2)
                with col1:
                    st.plotly_chart(
                        demo_charts.create_age_distribution_chart(age_dist, f"Age Distribution — {label}"),
                        use_container_width=True, key=f"lp_{demo_type}_age_dist",
                    )
                with col2:
                    if not age_sent.empty:
                        st.plotly_chart(
                            demo_charts.create_age_sentiment_chart(age_sent, f"Sentiment by Age — {label}"),
                            use_container_width=True, key=f"lp_{demo_type}_age_sent",
                        )
            else:
                st.plotly_chart(
                    demo_charts.create_age_distribution_chart(age_dist, f"Age Distribution — {label}"),
                    use_container_width=True, key=f"lp_{demo_type}_age_dist",
                )
        else:
            st.info("No age data available.")
    else:
        st.info("Age data not loaded.")

    st.markdown("---")

    # ── Experience Level Distribution ─────────────────────────────
    st.markdown("#### 🎯 Experience Level Distribution")
    if "experience_group" in demo_df.columns:
        exp_valid = demo_df[demo_df["experience_group"] != "Unknown"]
        if not exp_valid.empty:
            exp_grouped = exp_valid["experience_group"].value_counts().reset_index()
            exp_grouped.columns = ["experience_group", "count"]
            exp_grouped["percentage"] = (exp_grouped["count"] / exp_grouped["count"].sum() * 100).round(2)

            exp_detailed = pd.DataFrame()
            if "experience_level" in demo_df.columns:
                exp_det_valid = demo_df[demo_df["experience_level"].notna()]
                if not exp_det_valid.empty:
                    exp_detailed = exp_det_valid["experience_level"].value_counts().reset_index()
                    exp_detailed.columns = ["experience_level", "count"]
                    exp_detailed["percentage"] = (
                        exp_detailed["count"] / exp_detailed["count"].sum() * 100
                    ).round(2)

            tab_det, tab_grp = st.tabs(["📊 Detailed (0–10)", "📊 Grouped"])

            with tab_det:
                if not exp_detailed.empty:
                    st.plotly_chart(
                        demo_charts.create_experience_distribution_chart(
                            exp_detailed, f"Experience (0–10) — {label}", use_groups=False
                        ),
                        use_container_width=True, key=f"lp_{demo_type}_exp_det",
                    )
                else:
                    st.info("No detailed experience data available.")

            with tab_grp:
                if has_sentiment and not merged_for_sent.empty and "experience_group" in merged_for_sent.columns:
                    exp_sent = _compute_demo_by_sentiment(merged_for_sent, "experience_group")
                    col1, col2 = st.columns(2)
                    with col1:
                        st.plotly_chart(
                            demo_charts.create_experience_distribution_chart(
                                exp_grouped, f"Experience Groups — {label}", use_groups=True
                            ),
                            use_container_width=True, key=f"lp_{demo_type}_exp_grp",
                        )
                    with col2:
                        if not exp_sent.empty:
                            st.plotly_chart(
                                demo_charts.create_experience_sentiment_chart(
                                    exp_sent, f"Sentiment by Experience — {label}", use_groups=True
                                ),
                                use_container_width=True, key=f"lp_{demo_type}_exp_sent",
                            )
                else:
                    st.plotly_chart(
                        demo_charts.create_experience_distribution_chart(
                            exp_grouped, f"Experience Groups — {label}", use_groups=True
                        ),
                        use_container_width=True, key=f"lp_{demo_type}_exp_grp_only",
                    )
        else:
            st.info("No experience data available.")
    else:
        st.info("Experience data not loaded.")


def _compute_demo_by_sentiment(merged_df: pd.DataFrame, field: str) -> pd.DataFrame:
    """Return sentiment distribution per demographic group for a merged metadata+demo frame."""
    valid = merged_df[
        merged_df[field].notna() & (merged_df[field] != "Unknown")
        & merged_df["sentiment_polarity"].notna()
    ]
    if valid.empty:
        return pd.DataFrame()
    grp = valid.groupby([field, "sentiment_polarity"], as_index=False).size().rename(columns={"size": "count"})
    grp["percentage"] = grp.groupby(field)["count"].transform(
        lambda x: (x / x.sum() * 100).round(2)
    )
    return grp


def _render_summary(result: dict):
    """Render the LLM summary returned by LearningPathsSummaryAgent."""
    if not result.get("success"):
        st.error(f"AI analysis failed: {result.get('error', 'Unknown error')}")
        return

    summary  = result.get("summary", {})
    metadata = result.get("metadata", {})

    st.markdown("---")
    st.markdown("#### 📋 Executive Summary")
    st.info(summary.get("executive_summary", ""))

    col1, col2 = st.columns(2)

    with col1:
        arc = summary.get("journey_arc", [])
        if arc:
            st.markdown("#### 🗺️ Journey Arc")
            for phase in arc:
                st.markdown(
                    f"**{phase.get('phase', '')}**  \n{phase.get('description', '')}"
                )
                st.markdown("")

        sent_insights = summary.get("sentiment_insights", [])
        if sent_insights:
            st.markdown("#### 💬 Sentiment Insights")
            for ins in sent_insights:
                st.markdown(f"- {ins}")

        highlights = summary.get("content_highlights", [])
        if highlights:
            st.markdown("#### ✨ Content Highlights")
            for h in highlights:
                st.markdown(f"- {h}")

    with col2:
        retention = summary.get("retention_insights", [])
        if retention:
            st.markdown("#### 📉 Retention Insights")
            for r in retention:
                st.markdown(f"- {r}")

        recs = summary.get("recommendations", [])
        if recs:
            st.markdown("#### 🎯 Recommendations")
            for rec in recs:
                st.markdown(f"- {rec}")

    with st.expander("ℹ️ Analysis Metadata"):
        mc1, mc2, mc3 = st.columns(3)
        mc1.metric("Lessons Analysed", metadata.get("lessons_analyzed", 0))
        mc2.metric("Model Used",       metadata.get("model_used", "N/A"))
        mc3.metric("Tokens Used",      metadata.get("tokens_used", 0))