File size: 8,101 Bytes
599973c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | """
Learning Paths utility helpers β pure functions, no Streamlit dependency.
"""
import json
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import pandas as pd
# ββ Config helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def load_lp_config(config_path: str = None) -> dict:
if config_path is None:
config_path = Path(__file__).resolve().parent.parent / "config" / "viz_config.json"
with open(config_path) as f:
return json.load(f).get("learning_paths", {})
def get_brands(config: dict) -> List[str]:
return config.get("brands", [])
def get_brand_color(brand: str, config: dict) -> str:
return config.get("brand_colors", {}).get(brand, "#607D8B")
def label_for_path(path_id, config: dict) -> str:
return config.get("path_labels", {}).get(str(path_id), f"Path {path_id}")
# ββ DataFrame merge helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββ
def merge_lesson_metrics(
lesson_map: pd.DataFrame,
per_path_df: pd.DataFrame,
video_df: pd.DataFrame,
sentiment_df: pd.DataFrame,
) -> pd.DataFrame:
"""
Join all lesson-level metric DataFrames into one tidy frame indexed by
(learning_path_id, lesson_order). Returns an empty frame if lesson_map is empty.
"""
if lesson_map.empty:
return pd.DataFrame()
base = lesson_map[["brand", "learning_path_id", "first_lesson_content_id",
"lesson_order", "lesson_content_id", "content_title"]].copy()
join_key = ["learning_path_id", "lesson_content_id"]
if not per_path_df.empty and "content_id" in per_path_df.columns:
pp = per_path_df.rename(columns={"content_id": "lesson_content_id"})
cols = ["lesson_content_id", "learning_path_id", "lesson_number",
"students_completed", "denominator_students", "completion_rate"]
cols = [c for c in cols if c in pp.columns]
base = base.merge(pp[cols], on=join_key, how="left")
if not video_df.empty and "content_id" in video_df.columns:
vd = video_df.rename(columns={"content_id": "lesson_content_id"})
cols = ["lesson_content_id", "learning_path_id",
"total_starts", "total_completions", "video_completion_rate"]
cols = [c for c in cols if c in vd.columns]
base = base.merge(vd[cols], on=join_key, how="left")
if not sentiment_df.empty:
sent_key = ["learning_path_id", "lesson_order"]
sent_cols = [c for c in [
"learning_path_id", "lesson_order",
"total_comments", "very_positive", "positive", "neutral",
"negative", "very_negative", "avg_sentiment_score",
] if c in sentiment_df.columns]
base = base.merge(sentiment_df[sent_cols], on=sent_key, how="left")
# Fill numeric nulls with 0 / NaN as appropriate
for col in ["students_completed", "denominator_students", "total_starts",
"total_completions", "total_comments",
"very_positive", "positive", "neutral", "negative", "very_negative"]:
if col in base.columns:
base[col] = base[col].fillna(0).astype(int)
base.sort_values(["learning_path_id", "lesson_order"], inplace=True)
return base.reset_index(drop=True)
def merge_method_wide(
method_df: pd.DataFrame,
video_df: pd.DataFrame,
sentiment_df: pd.DataFrame,
config: dict,
) -> pd.DataFrame:
"""Same as merge_lesson_metrics but uses method-wide completion and
adds method_lesson_number as the continuous x-axis."""
if method_df.empty:
return pd.DataFrame()
base = method_df.rename(columns={"content_id": "lesson_content_id"}).copy()
join_key = ["learning_path_id", "lesson_content_id"]
if not video_df.empty and "content_id" in video_df.columns:
vd = video_df.rename(columns={"content_id": "lesson_content_id"})
cols = [c for c in ["lesson_content_id", "learning_path_id",
"total_starts", "total_completions",
"video_completion_rate"] if c in vd.columns]
base = base.merge(vd[cols], on=join_key, how="left")
if not sentiment_df.empty and "lesson_order" in base.columns:
sent_key = ["learning_path_id", "lesson_order"]
sent_cols = [c for c in [
"learning_path_id", "lesson_order",
"total_comments", "very_positive", "positive", "neutral",
"negative", "very_negative", "avg_sentiment_score",
] if c in sentiment_df.columns]
base = base.merge(sentiment_df[sent_cols], on=sent_key, how="left")
# Add path label
base["path_label"] = base["learning_path_id"].apply(
lambda pid: label_for_path(pid, config)
)
for col in ["students_completed", "total_starts", "total_completions",
"total_comments", "very_positive", "positive", "neutral",
"negative", "very_negative"]:
if col in base.columns:
base[col] = base[col].fillna(0).astype(int)
base.sort_values("method_lesson_number", inplace=True)
return base.reset_index(drop=True)
# ββ Analysis helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def find_top_dropoffs(df: pd.DataFrame, n: int = 5,
rate_col: str = "completion_rate",
order_col: str = "lesson_order") -> pd.DataFrame:
"""
Return the top-N lessons with the largest completion-rate drop
compared to the previous lesson (within the same learning_path_id).
"""
if df.empty or rate_col not in df.columns:
return pd.DataFrame()
result = df.copy().sort_values(["learning_path_id", order_col])
result["prev_rate"] = result.groupby("learning_path_id")[rate_col].shift(1)
result["dropoff"] = result["prev_rate"] - result[rate_col]
result = result[result["dropoff"].notna() & (result["dropoff"] > 0)]
return result.nlargest(n, "dropoff")[
[c for c in ["learning_path_id", order_col, "content_title",
"prev_rate", rate_col, "dropoff"] if c in result.columns]
].reset_index(drop=True)
def get_overview_kpis(merged: pd.DataFrame) -> dict:
"""Return a dict of high-level KPI values from the merged metrics frame."""
if merged.empty:
return {}
total_students = int(merged["denominator_students"].max()) if "denominator_students" in merged.columns else 0
avg_completion = float(merged["completion_rate"].mean()) if "completion_rate" in merged.columns else 0.0
avg_sentiment = float(merged["avg_sentiment_score"].mean()) if "avg_sentiment_score" in merged.columns else 0.0
total_comments = int(merged["total_comments"].sum()) if "total_comments" in merged.columns else 0
n_paths = merged["learning_path_id"].nunique() if "learning_path_id" in merged.columns else 0
n_lessons = len(merged)
return {
"total_students": total_students,
"avg_completion_pct": avg_completion * 100,
"avg_sentiment_score": avg_sentiment,
"total_comments": total_comments,
"n_paths": n_paths,
"n_lessons": n_lessons,
}
def filter_by_paths(df: pd.DataFrame,
path_ids: Optional[List[int]]) -> pd.DataFrame:
"""Filter df to a subset of learning_path_ids. None or empty = all."""
if not path_ids or df.empty or "learning_path_id" not in df.columns:
return df
return df[df["learning_path_id"].isin(path_ids)].reset_index(drop=True)
def short_title(title: Optional[str], max_len: int = 35) -> str:
"""Truncate a content title for display in labels."""
if not title or pd.isna(title):
return "β"
t = str(title).strip()
return t if len(t) <= max_len else t[:max_len] + "β¦" |