File size: 4,726 Bytes
58db664 5f1963f 58db664 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | """
HelpScout utility helpers — pure functions, no Streamlit dependency.
"""
import json
from pathlib import Path
import pandas as pd
# ---------------------------------------------------------------------------
# Topic taxonomy helpers
# ---------------------------------------------------------------------------
def load_topic_taxonomy(path: str = None) -> dict:
"""
Load topics.json and return {id: {'label': str, 'description': str}}.
Default path resolves to process_helpscout/config_files/topics.json
relative to the project root.
"""
if path is None:
root = Path(__file__).resolve().parent.parent.parent
path = root / "process_helpscout" / "config_files" / "topics.json"
with open(path, "r", encoding="utf-8") as f:
raw = json.load(f)
return {t["id"]: {"label": t["label"], "description": t.get("description", "")}
for t in raw.get("topics", [])}
def topic_label(topic_id: str, taxonomy: dict) -> str:
"""Return human-readable label for a topic id. Falls back to title-cased id."""
if topic_id in taxonomy:
return taxonomy[topic_id]["label"]
return topic_id.replace("_", " ").title()
def parse_topics(value) -> list:
"""Split a comma-separated TOPICS string into a list of stripped lowercase ids."""
if pd.isna(value) or not isinstance(value, str) or not value.strip():
return []
return [t.strip().lower() for t in value.split(",") if t.strip()]
def explode_topics(df: pd.DataFrame, topics_col: str = "topics") -> pd.DataFrame:
"""
Return a new dataframe with one row per (conversation_id, topic_id).
Requires df to have a 'conversation_id' column and a topics_col column.
"""
df = df.copy()
df["_topic_list"] = df[topics_col].apply(parse_topics)
exploded = df.explode("_topic_list").rename(columns={"_topic_list": "topic_id"})
exploded = exploded[exploded["topic_id"].notna() & (exploded["topic_id"] != "")]
return exploded.drop(columns=[topics_col], errors="ignore").reset_index(drop=True)
# ---------------------------------------------------------------------------
# Boolean flag helpers
# ---------------------------------------------------------------------------
def boolean_flag_counts(df: pd.DataFrame) -> dict:
"""Return counts for refund / cancellation / membership flags."""
return {
"is_refund_request": int(df["is_refund_request"].sum()) if "is_refund_request" in df.columns else 0,
"is_cancellation": int(df["is_cancellation"].sum()) if "is_cancellation" in df.columns else 0,
"is_membership": int(df["is_membership"].sum()) if "is_membership" in df.columns else 0,
}
def compute_escalation_flag(df: pd.DataFrame, escalation_sentiments: list) -> pd.Series:
"""
Boolean Series: True when conversation is negative-sentiment
OR is a refund request OR is a cancellation.
"""
is_neg = df["sentiment_polarity"].isin(escalation_sentiments)
is_refund = df.get("is_refund_request", pd.Series(False, index=df.index)).fillna(False).astype(bool)
is_cancel = df.get("is_cancellation", pd.Series(False, index=df.index)).fillna(False).astype(bool)
return is_neg | is_refund | is_cancel
# ---------------------------------------------------------------------------
# Filter description builder
# ---------------------------------------------------------------------------
def build_filter_description(filters: dict, taxonomy: dict) -> str:
"""
Convert the filter dict from the analysis page into a human-readable string
suitable for the agent prompt and PDF cover.
"""
parts = []
if filters.get("date_range"):
s, e = filters["date_range"]
parts.append(f"Date: {s} to {e}")
if filters.get("sentiments"):
parts.append(f"Sentiments: {', '.join(filters['sentiments'])}")
if filters.get("topics"):
labels = [topic_label(t, taxonomy) for t in filters["topics"]]
parts.append(f"Topics: {', '.join(labels)}")
if filters.get("statuses"):
parts.append(f"Status: {', '.join(filters['statuses'])}")
if filters.get("sources"):
parts.append(f"Source: {', '.join(filters['sources'])}")
if filters.get("refund_only"):
parts.append("Refund requests only")
if filters.get("cancel_only"):
parts.append("Cancellations only")
if filters.get("membership_only"):
parts.append("Membership requests only")
member_status = filters.get("member_status", "All")
if member_status and member_status != "All":
parts.append(f"Customer type: {member_status}")
return "; ".join(parts) if parts else "No filters applied — showing all conversations" |