""" Learning Paths Page Analyzes ordered sequences of lessons within Musora app brands. Shows how engagement and sentiment evolve as students progress through a path. Architecture: - Filter panel β Fetch button β session state β charts + cards - Per-Path view: one funnel per learning path, denominator resets per path - Method-Wide view: continuous funnel across all paths with one shared denominator - All data is `lp_`-prefixed in session state to avoid collision with other pages """ import sys from pathlib import Path from typing import Optional import pandas as pd import streamlit as st parent_dir = Path(__file__).resolve().parent.parent sys.path.append(str(parent_dir)) from data.learning_paths_data_loader import LearningPathsDataLoader from utils.learning_paths_utils import ( merge_lesson_metrics, merge_method_wide, find_top_dropoffs, get_overview_kpis, filter_by_paths, label_for_path, short_title, load_lp_config, ) from visualizations.learning_paths_charts import LearningPathsCharts from visualizations.distribution_charts import DistributionCharts from visualizations.demographic_charts import DemographicCharts from agents.learning_paths_summary_agent import LearningPathsSummaryAgent _VIEWS = ["Per-Path", "Method-Wide"] def render_learning_paths(data_loader: LearningPathsDataLoader): """Main entry point for the Learning Paths page.""" st.title("π Learning Paths") st.markdown( "Analyze ordered lesson sequences β see how student engagement and sentiment " "evolve as they progress through each learning path." ) st.markdown("---") cfg = load_lp_config() charts = LearningPathsCharts() brands = cfg.get("brands", []) if not brands: st.error("No brands configured for Learning Paths. Check `config/viz_config.json`.") return # ββ Filter panel βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π― Filters") filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 2]) with filter_col1: brand = st.selectbox( "Brand", options=brands, index=0, key="lp_brand", ) with filter_col2: view_mode = st.radio( "View Mode", options=_VIEWS, index=0, horizontal=True, key="lp_view_mode", help=( "**Per-Path**: each path's funnel resets to its own first-lesson count.\n\n" "**Method-Wide**: one continuous funnel using a single denominator " "(students who started Learning Path 1) β shows true end-to-end attrition." ), ) with filter_col3: # Path selector β populated after a brand is chosen prev_brand_key = st.session_state.get("lp_fetch_key", (None,))[0] prev_lesson_map = st.session_state.get("lp_lesson_map", pd.DataFrame()) if not prev_lesson_map.empty and prev_brand_key == brand: available_paths = sorted(prev_lesson_map["learning_path_id"].unique().tolist()) path_labels_opts = {pid: label_for_path(pid, cfg) for pid in available_paths} selected_paths = st.multiselect( "Learning Paths (leave empty = all)", options=available_paths, default=[], format_func=lambda pid: path_labels_opts[pid], key="lp_selected_paths", ) else: selected_paths = [] st.info("Fetch data to populate path selector.") st.markdown("---") # ββ Fetch key & stale check βββββββββββββββββββββββββββββββββββββββββββββββ fetch_key = (brand, view_mode) has_data = ( st.session_state.get("lp_fetch_key") == fetch_key and "lp_lesson_map" in st.session_state and not st.session_state.get("lp_lesson_map", pd.DataFrame()).empty ) fetch_col, info_col = st.columns([1, 3]) with fetch_col: fetch_clicked = st.button("π Fetch Data", type="primary", use_container_width=True, key="lp_fetch_btn") with info_col: if has_data: n_lessons = len(st.session_state.get("lp_lesson_map", pd.DataFrame())) st.success(f"β Loaded **{n_lessons:,}** lessons for **{brand}**") elif not fetch_clicked: st.info("π Select a brand and click **Fetch Data** to load learning path metrics.") if fetch_clicked: _fetch_all(data_loader, brand, fetch_key) st.rerun() if not has_data and not fetch_clicked: return # ββ Load merged frames βββββββββββββββββββββββββββββββββββββββββββββββββββ lesson_map = st.session_state.get("lp_lesson_map", pd.DataFrame()) per_path_df = st.session_state.get("lp_per_path", pd.DataFrame()) method_df = st.session_state.get("lp_method_wide", pd.DataFrame()) video_df = st.session_state.get("lp_video", pd.DataFrame()) sentiment_df = st.session_state.get("lp_sentiment", pd.DataFrame()) if view_mode == "Per-Path": merged = merge_lesson_metrics(lesson_map, per_path_df, video_df, sentiment_df) else: merged = merge_method_wide(method_df, video_df, sentiment_df, cfg) if merged.empty: st.warning("No data returned. Check your Snowflake connection.") return # Apply path filter (Per-Path only) if view_mode == "Per-Path" and selected_paths: merged = filter_by_paths(merged, selected_paths) # Add path labels if "learning_path_id" in merged.columns: merged["path_label"] = merged["learning_path_id"].apply( lambda pid: label_for_path(pid, cfg) ) # ββ Overview KPIs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π Overview") kpis = get_overview_kpis(merged) k1, k2, k3, k4, k5, k6 = st.columns(6) k1.metric("Method Starters", f"{kpis.get('total_students', 0):,}") k2.metric("Avg Completion", f"{kpis.get('avg_completion_pct', 0):.1f}%") k3.metric("Avg Sentiment", f"{kpis.get('avg_sentiment_score', 0):.2f}", help="Scale: β2 (very negative) to +2 (very positive)") k4.metric("Total Comments", f"{kpis.get('total_comments', 0):,}") k5.metric("Learning Paths", f"{kpis.get('n_paths', 0)}") k6.metric("Total Lessons", f"{kpis.get('n_lessons', 0)}") st.markdown("---") # ββ Headline: Dual-Axis Engagement βββββββββββββββββββββββββββββββββββββββ st.markdown("### π― Engagement Journey") if view_mode == "Per-Path": path_ids = sorted(merged["learning_path_id"].unique()) \ if "learning_path_id" in merged.columns else [] if len(path_ids) > 1: tab_labels = [label_for_path(pid, cfg) for pid in path_ids] tabs = st.tabs(tab_labels) for tab, pid in zip(tabs, path_ids): with tab: st.plotly_chart( charts.create_dual_axis_engagement(merged, path_id=pid, title=f"Completion vs Sentiment β {label_for_path(pid, cfg)}"), use_container_width=True, key=f"lp_dual_{pid}", ) elif path_ids: st.plotly_chart( charts.create_dual_axis_engagement(merged, path_id=path_ids[0]), use_container_width=True, key="lp_dual_single", ) else: st.plotly_chart( charts.create_dual_axis_engagement(merged), use_container_width=True, key="lp_dual_all", ) else: col1, col2 = st.columns(2) with col1: st.plotly_chart(charts.create_method_funnel(merged), use_container_width=True, key="lp_method_funnel") with col2: st.plotly_chart(charts.create_method_sentiment_journey(merged), use_container_width=True, key="lp_method_sent") st.markdown("---") # ββ Completion Funnel βββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π Completion Funnel") if view_mode == "Per-Path": rate_col = "completion_rate" x_col = "lesson_number" if "lesson_number" in merged.columns else "lesson_order" st.plotly_chart(charts.create_completion_funnel(merged, x_col=x_col), use_container_width=True, key="lp_completion_funnel") else: st.info("Method-Wide funnel shown in the Engagement Journey section above.") st.markdown("---") # ββ Video Engagement ββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π¬ Video Completion Rate") st.caption( "Of students who *started* a lesson video, what percentage finished it? " "This isolates whether the content itself holds attention." ) x_col = "method_lesson_number" if (view_mode == "Method-Wide" and "method_lesson_number" in merged.columns) else "lesson_order" st.plotly_chart(charts.create_video_engagement(merged, x_col=x_col), use_container_width=True, key="lp_video_chart") st.markdown("---") # ββ Volume Analysis βββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π Volume Analysis") st.caption("Total comments per lesson β shows where students are most engaged.") x_col_vol = "method_lesson_number" if (view_mode == "Method-Wide" and "method_lesson_number" in merged.columns) else "lesson_order" st.plotly_chart( charts.create_comment_volume_chart(merged, x_col=x_col_vol), use_container_width=True, key="lp_volume_chart", ) st.markdown("---") # ββ Sentiment Journey βββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π¬ Sentiment Journey") x_col = "method_lesson_number" if (view_mode == "Method-Wide" and "method_lesson_number" in merged.columns) else "lesson_order" col1, col2 = st.columns(2) with col1: st.plotly_chart(charts.create_sentiment_journey(merged, x_col=x_col), use_container_width=True, key="lp_sent_journey") with col2: # Show stacked bar for first path (or single combined if method-wide) focus_pid = sorted(merged["learning_path_id"].unique())[0] \ if "learning_path_id" in merged.columns else None st.plotly_chart( charts.create_sentiment_stacked_bar( merged, x_col=x_col, path_id=focus_pid, title=f"Sentiment Breakdown β {label_for_path(focus_pid, cfg)}" if focus_pid else "Sentiment Breakdown"), use_container_width=True, key="lp_sent_stacked", ) with st.expander("π Sentiment Heatmap", expanded=False): focus_pid = sorted(merged["learning_path_id"].unique())[0] \ if "learning_path_id" in merged.columns else None st.plotly_chart( charts.create_lesson_sentiment_heatmap(merged, path_id=focus_pid), use_container_width=True, key="lp_heatmap", ) st.markdown("---") # ββ Intent Analysis βββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π Intent Analysis") metadata_df = st.session_state.get("lp_metadata", pd.DataFrame()) if view_mode == "Per-Path" and selected_paths: metadata_df = metadata_df[metadata_df["learning_path_id"].isin(selected_paths)] if metadata_df.empty or "intent" not in metadata_df.columns: st.info("No intent data available. Load data first.") else: _render_intent_emotion_tabs(metadata_df, "intent", cfg, "lp_intent") st.markdown("---") # ββ Emotion Analysis ββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π Emotion Analysis") metadata_df_emo = st.session_state.get("lp_metadata", pd.DataFrame()) if view_mode == "Per-Path" and selected_paths: metadata_df_emo = metadata_df_emo[metadata_df_emo["learning_path_id"].isin(selected_paths)] has_emotions = ( not metadata_df_emo.empty and "emotions" in metadata_df_emo.columns and metadata_df_emo["emotions"].notna().any() ) if not has_emotions: st.info("No emotion data available. Emotions are extracted for newly processed comments.") else: _render_intent_emotion_tabs(metadata_df_emo, "emotion", cfg, "lp_emotion") st.markdown("---") # ββ Drop-off Analysis βββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### β οΈ Top Drop-off Points") rate_col = "completion_rate" order_col = "method_lesson_number" if (view_mode == "Method-Wide" and "method_lesson_number" in merged.columns) else "lesson_order" dropoffs = find_top_dropoffs(merged, n=7, rate_col=rate_col, order_col=order_col) if not dropoffs.empty: st.plotly_chart(charts.create_dropoff_chart(dropoffs), use_container_width=True, key="lp_dropoffs") else: st.info("No significant lesson-to-lesson drop-offs detected.") st.markdown("---") # ββ Demographics ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π₯ Demographics") commenter_demo = st.session_state.get("lp_commenter_demo", pd.DataFrame()) student_demo = st.session_state.get("lp_student_demo", pd.DataFrame()) meta_for_demo = st.session_state.get("lp_metadata", pd.DataFrame()) demo_tab1, demo_tab2 = st.tabs(["π¬ Commenters", "π All Students"]) with demo_tab1: if commenter_demo.empty: st.info("No commenter demographic data available.") else: _render_demographics(commenter_demo, meta_for_demo, "commenter") with demo_tab2: if student_demo.empty: st.info("No student demographic data available.") else: _render_demographics(student_demo, pd.DataFrame(), "student") st.markdown("---") # ββ AI Summary ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π€ AI Learning Journey Summary") st.markdown( "Generate an LLM-powered narrative describing the student experience " "through this learning path β sentiment arcs, retention patterns, and " "actionable recommendations for content designers." ) summary_key = (brand, view_mode, tuple(sorted(selected_paths))) summary_available = ( "lp_summary" in st.session_state and st.session_state.get("lp_summary_key") == summary_key and st.session_state["lp_summary"] is not None ) gen_col, _ = st.columns([1, 3]) with gen_col: gen_clicked = st.button("π§ Generate AI Summary", type="primary", use_container_width=True, key="lp_gen_summary") if gen_clicked: comments_df = st.session_state.get("lp_comments", pd.DataFrame()) with st.spinner("Analysing learning path data with AIβ¦ this may take 20β40 secondsβ¦"): agent = LearningPathsSummaryAgent() focus_pid = sorted(merged["learning_path_id"].unique())[0] \ if "learning_path_id" in merged.columns and view_mode == "Per-Path" \ else None result = agent.process({ "metrics": merged, "comments": comments_df, "brand": brand, "path_id": focus_pid, "path_label": label_for_path(focus_pid, cfg) if focus_pid else "Full Method", }) st.session_state["lp_summary"] = result st.session_state["lp_summary_key"] = summary_key st.rerun() if summary_available: _render_summary(st.session_state["lp_summary"]) st.markdown("---") # ββ Per-Lesson Drill-down βββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### π Per-Lesson Detail") st.caption("Expand any lesson to see the sentiment breakdown and sample comments.") _render_lesson_cards(merged, data_loader, brand, cfg) st.markdown("---") # ββ Export CSV ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("### πΎ Export Data") export_cols = [c for c in [ "brand", "learning_path_id", "path_label", "lesson_order", "lesson_content_id", "content_title", "lesson_number", "students_completed", "denominator_students", "completion_rate", "total_starts", "total_completions", "video_completion_rate", "total_comments", "very_positive", "positive", "neutral", "negative", "very_negative", "avg_sentiment_score", ] if c in merged.columns] csv = merged[export_cols].to_csv(index=False) st.download_button( label="π₯ Download as CSV", data=csv, file_name=f"learning_paths_{brand}.csv", mime="text/csv", key="lp_csv_download", ) # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ # Private helpers # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ def _fetch_all(loader: LearningPathsDataLoader, brand: str, fetch_key: tuple): """Run all queries and store results in session state.""" with st.spinner(f"Fetching learning path data for {brand}β¦"): st.session_state["lp_lesson_map"] = loader.load_lesson_map(brand) st.session_state["lp_per_path"] = loader.load_per_path_completion(brand) st.session_state["lp_method_wide"] = loader.load_method_wide_completion(brand) st.session_state["lp_video"] = loader.load_video_engagement(brand) st.session_state["lp_sentiment"] = loader.load_lesson_sentiment(brand) st.session_state["lp_metadata"] = loader.load_lesson_metadata(brand) st.session_state["lp_commenter_demo"] = loader.load_lp_commenter_demographics(brand) st.session_state["lp_student_demo"] = loader.load_lp_student_demographics(brand) st.session_state["lp_fetch_key"] = fetch_key # Invalidate prior summary when brand/mode changes st.session_state.pop("lp_summary", None) st.session_state.pop("lp_summary_key", None) st.session_state["lp_drill_page"] = 1 def _render_lesson_cards(merged: pd.DataFrame, loader: LearningPathsDataLoader, brand: str, cfg: dict): """Paginated lesson cards (10 per page). Comments fetched on expand.""" if merged.empty: st.info("No lesson data available.") return per_page = 10 total = len(merged) if "lp_drill_page" not in st.session_state: st.session_state["lp_drill_page"] = 1 total_pages = max(1, (total + per_page - 1) // per_page) if total > per_page: pc1, pc2, pc3 = st.columns([1, 2, 1]) with pc1: if st.button("β¬ οΈ Previous", key="lp_prev_top", disabled=st.session_state["lp_drill_page"] == 1): st.session_state["lp_drill_page"] -= 1 st.rerun() with pc2: pg = st.session_state["lp_drill_page"] st.markdown( f"