Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

File size: 4,726 Bytes

"""
HelpScout utility helpers — pure functions, no Streamlit dependency.
"""
import json
from pathlib import Path

import pandas as pd


# ---------------------------------------------------------------------------
# Topic taxonomy helpers
# ---------------------------------------------------------------------------

def load_topic_taxonomy(path: str = None) -> dict:
    """
    Load topics.json and return {id: {'label': str, 'description': str}}.
    Default path resolves to process_helpscout/config_files/topics.json
    relative to the project root.
    """
    if path is None:
        root = Path(__file__).resolve().parent.parent.parent
        path = root / "process_helpscout" / "config_files" / "topics.json"
    with open(path, "r", encoding="utf-8") as f:
        raw = json.load(f)
    return {t["id"]: {"label": t["label"], "description": t.get("description", "")}
            for t in raw.get("topics", [])}


def topic_label(topic_id: str, taxonomy: dict) -> str:
    """Return human-readable label for a topic id. Falls back to title-cased id."""
    if topic_id in taxonomy:
        return taxonomy[topic_id]["label"]
    return topic_id.replace("_", " ").title()


def parse_topics(value) -> list:
    """Split a comma-separated TOPICS string into a list of stripped lowercase ids."""
    if pd.isna(value) or not isinstance(value, str) or not value.strip():
        return []
    return [t.strip().lower() for t in value.split(",") if t.strip()]


def explode_topics(df: pd.DataFrame, topics_col: str = "topics") -> pd.DataFrame:
    """
    Return a new dataframe with one row per (conversation_id, topic_id).
    Requires df to have a 'conversation_id' column and a topics_col column.
    """
    df = df.copy()
    df["_topic_list"] = df[topics_col].apply(parse_topics)
    exploded = df.explode("_topic_list").rename(columns={"_topic_list": "topic_id"})
    exploded = exploded[exploded["topic_id"].notna() & (exploded["topic_id"] != "")]
    return exploded.drop(columns=[topics_col], errors="ignore").reset_index(drop=True)


# ---------------------------------------------------------------------------
# Boolean flag helpers
# ---------------------------------------------------------------------------

def boolean_flag_counts(df: pd.DataFrame) -> dict:
    """Return counts for refund / cancellation / membership flags."""
    return {
        "is_refund_request": int(df["is_refund_request"].sum()) if "is_refund_request" in df.columns else 0,
        "is_cancellation":   int(df["is_cancellation"].sum())   if "is_cancellation"   in df.columns else 0,
        "is_membership":     int(df["is_membership"].sum())     if "is_membership"     in df.columns else 0,
    }


def compute_escalation_flag(df: pd.DataFrame, escalation_sentiments: list) -> pd.Series:
    """
    Boolean Series: True when conversation is negative-sentiment
    OR is a refund request OR is a cancellation.
    """
    is_neg = df["sentiment_polarity"].isin(escalation_sentiments)
    is_refund = df.get("is_refund_request", pd.Series(False, index=df.index)).fillna(False).astype(bool)
    is_cancel = df.get("is_cancellation",   pd.Series(False, index=df.index)).fillna(False).astype(bool)
    return is_neg | is_refund | is_cancel


# ---------------------------------------------------------------------------
# Filter description builder
# ---------------------------------------------------------------------------

def build_filter_description(filters: dict, taxonomy: dict) -> str:
    """
    Convert the filter dict from the analysis page into a human-readable string
    suitable for the agent prompt and PDF cover.
    """
    parts = []
    if filters.get("date_range"):
        s, e = filters["date_range"]
        parts.append(f"Date: {s} to {e}")
    if filters.get("sentiments"):
        parts.append(f"Sentiments: {', '.join(filters['sentiments'])}")
    if filters.get("topics"):
        labels = [topic_label(t, taxonomy) for t in filters["topics"]]
        parts.append(f"Topics: {', '.join(labels)}")
    if filters.get("statuses"):
        parts.append(f"Status: {', '.join(filters['statuses'])}")
    if filters.get("sources"):
        parts.append(f"Source: {', '.join(filters['sources'])}")
    if filters.get("refund_only"):
        parts.append("Refund requests only")
    if filters.get("cancel_only"):
        parts.append("Cancellations only")
    if filters.get("membership_only"):
        parts.append("Membership requests only")
    member_status = filters.get("member_status", "All")
    if member_status and member_status != "All":
        parts.append(f"Customer type: {member_status}")
    return "; ".join(parts) if parts else "No filters applied — showing all conversations"