Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /utils /helpscout_utils.py

Danialebrat

Adding members sections

5f1963f 26 days ago

raw

history blame contribute delete

4.73 kB

	"""
	HelpScout utility helpers — pure functions, no Streamlit dependency.
	"""
	import json
	from pathlib import Path

	import pandas as pd


	# ---------------------------------------------------------------------------
	# Topic taxonomy helpers
	# ---------------------------------------------------------------------------

	def load_topic_taxonomy(path: str = None) -> dict:
	"""
	Load topics.json and return {id: {'label': str, 'description': str}}.
	Default path resolves to process_helpscout/config_files/topics.json
	relative to the project root.
	"""
	if path is None:
	root = Path(__file__).resolve().parent.parent.parent
	path = root / "process_helpscout" / "config_files" / "topics.json"
	with open(path, "r", encoding="utf-8") as f:
	raw = json.load(f)
	return {t["id"]: {"label": t["label"], "description": t.get("description", "")}
	for t in raw.get("topics", [])}


	def topic_label(topic_id: str, taxonomy: dict) -> str:
	"""Return human-readable label for a topic id. Falls back to title-cased id."""
	if topic_id in taxonomy:
	return taxonomy[topic_id]["label"]
	return topic_id.replace("_", " ").title()


	def parse_topics(value) -> list:
	"""Split a comma-separated TOPICS string into a list of stripped lowercase ids."""
	if pd.isna(value) or not isinstance(value, str) or not value.strip():
	return []
	return [t.strip().lower() for t in value.split(",") if t.strip()]


	def explode_topics(df: pd.DataFrame, topics_col: str = "topics") -> pd.DataFrame:
	"""
	Return a new dataframe with one row per (conversation_id, topic_id).
	Requires df to have a 'conversation_id' column and a topics_col column.
	"""
	df = df.copy()
	df["_topic_list"] = df[topics_col].apply(parse_topics)
	exploded = df.explode("_topic_list").rename(columns={"_topic_list": "topic_id"})
	exploded = exploded[exploded["topic_id"].notna() & (exploded["topic_id"] != "")]
	return exploded.drop(columns=[topics_col], errors="ignore").reset_index(drop=True)


	# ---------------------------------------------------------------------------
	# Boolean flag helpers
	# ---------------------------------------------------------------------------

	def boolean_flag_counts(df: pd.DataFrame) -> dict:
	"""Return counts for refund / cancellation / membership flags."""
	return {
	"is_refund_request": int(df["is_refund_request"].sum()) if "is_refund_request" in df.columns else 0,
	"is_cancellation": int(df["is_cancellation"].sum()) if "is_cancellation" in df.columns else 0,
	"is_membership": int(df["is_membership"].sum()) if "is_membership" in df.columns else 0,
	}


	def compute_escalation_flag(df: pd.DataFrame, escalation_sentiments: list) -> pd.Series:
	"""
	Boolean Series: True when conversation is negative-sentiment
	OR is a refund request OR is a cancellation.
	"""
	is_neg = df["sentiment_polarity"].isin(escalation_sentiments)
	is_refund = df.get("is_refund_request", pd.Series(False, index=df.index)).fillna(False).astype(bool)
	is_cancel = df.get("is_cancellation", pd.Series(False, index=df.index)).fillna(False).astype(bool)
	return is_neg \| is_refund \| is_cancel


	# ---------------------------------------------------------------------------
	# Filter description builder
	# ---------------------------------------------------------------------------

	def build_filter_description(filters: dict, taxonomy: dict) -> str:
	"""
	Convert the filter dict from the analysis page into a human-readable string
	suitable for the agent prompt and PDF cover.
	"""
	parts = []
	if filters.get("date_range"):
	s, e = filters["date_range"]
	parts.append(f"Date: {s} to {e}")
	if filters.get("sentiments"):
	parts.append(f"Sentiments: {', '.join(filters['sentiments'])}")
	if filters.get("topics"):
	labels = [topic_label(t, taxonomy) for t in filters["topics"]]
	parts.append(f"Topics: {', '.join(labels)}")
	if filters.get("statuses"):
	parts.append(f"Status: {', '.join(filters['statuses'])}")
	if filters.get("sources"):
	parts.append(f"Source: {', '.join(filters['sources'])}")
	if filters.get("refund_only"):
	parts.append("Refund requests only")
	if filters.get("cancel_only"):
	parts.append("Cancellations only")
	if filters.get("membership_only"):
	parts.append("Membership requests only")
	member_status = filters.get("member_status", "All")
	if member_status and member_status != "All":
	parts.append(f"Customer type: {member_status}")
	return "; ".join(parts) if parts else "No filters applied — showing all conversations"