""" HelpScout utility helpers — pure functions, no Streamlit dependency. """ import json from pathlib import Path import pandas as pd # --------------------------------------------------------------------------- # Topic taxonomy helpers # --------------------------------------------------------------------------- def load_topic_taxonomy(path: str = None) -> dict: """ Load topics.json and return {id: {'label': str, 'description': str}}. Default path resolves to process_helpscout/config_files/topics.json relative to the project root. """ if path is None: root = Path(__file__).resolve().parent.parent.parent path = root / "process_helpscout" / "config_files" / "topics.json" with open(path, "r", encoding="utf-8") as f: raw = json.load(f) return {t["id"]: {"label": t["label"], "description": t.get("description", "")} for t in raw.get("topics", [])} def topic_label(topic_id: str, taxonomy: dict) -> str: """Return human-readable label for a topic id. Falls back to title-cased id.""" if topic_id in taxonomy: return taxonomy[topic_id]["label"] return topic_id.replace("_", " ").title() def parse_topics(value) -> list: """Split a comma-separated TOPICS string into a list of stripped lowercase ids.""" if pd.isna(value) or not isinstance(value, str) or not value.strip(): return [] return [t.strip().lower() for t in value.split(",") if t.strip()] def explode_topics(df: pd.DataFrame, topics_col: str = "topics") -> pd.DataFrame: """ Return a new dataframe with one row per (conversation_id, topic_id). Requires df to have a 'conversation_id' column and a topics_col column. """ df = df.copy() df["_topic_list"] = df[topics_col].apply(parse_topics) exploded = df.explode("_topic_list").rename(columns={"_topic_list": "topic_id"}) exploded = exploded[exploded["topic_id"].notna() & (exploded["topic_id"] != "")] return exploded.drop(columns=[topics_col], errors="ignore").reset_index(drop=True) # --------------------------------------------------------------------------- # Boolean flag helpers # --------------------------------------------------------------------------- def boolean_flag_counts(df: pd.DataFrame) -> dict: """Return counts for refund / cancellation / membership flags.""" return { "is_refund_request": int(df["is_refund_request"].sum()) if "is_refund_request" in df.columns else 0, "is_cancellation": int(df["is_cancellation"].sum()) if "is_cancellation" in df.columns else 0, "is_membership": int(df["is_membership"].sum()) if "is_membership" in df.columns else 0, } def compute_escalation_flag(df: pd.DataFrame, escalation_sentiments: list) -> pd.Series: """ Boolean Series: True when conversation is negative-sentiment OR is a refund request OR is a cancellation. """ is_neg = df["sentiment_polarity"].isin(escalation_sentiments) is_refund = df.get("is_refund_request", pd.Series(False, index=df.index)).fillna(False).astype(bool) is_cancel = df.get("is_cancellation", pd.Series(False, index=df.index)).fillna(False).astype(bool) return is_neg | is_refund | is_cancel # --------------------------------------------------------------------------- # Filter description builder # --------------------------------------------------------------------------- def build_filter_description(filters: dict, taxonomy: dict) -> str: """ Convert the filter dict from the analysis page into a human-readable string suitable for the agent prompt and PDF cover. """ parts = [] if filters.get("date_range"): s, e = filters["date_range"] parts.append(f"Date: {s} to {e}") if filters.get("sentiments"): parts.append(f"Sentiments: {', '.join(filters['sentiments'])}") if filters.get("topics"): labels = [topic_label(t, taxonomy) for t in filters["topics"]] parts.append(f"Topics: {', '.join(labels)}") if filters.get("statuses"): parts.append(f"Status: {', '.join(filters['statuses'])}") if filters.get("sources"): parts.append(f"Source: {', '.join(filters['sources'])}") if filters.get("refund_only"): parts.append("Refund requests only") if filters.get("cancel_only"): parts.append("Cancellations only") if filters.get("membership_only"): parts.append("Membership requests only") member_status = filters.get("member_status", "All") if member_status and member_status != "All": parts.append(f"Customer type: {member_status}") return "; ".join(parts) if parts else "No filters applied — showing all conversations"