Spaces:

GenAICoder
/

Risk_Manager

Sleeping

App Files Files Community

Risk_Manager / analytics /deep_dive_agentic.py

GenAICoder

Update analytics/deep_dive_agentic.py

5c154c9 verified about 1 month ago

Raw

History Blame Contribute Delete

34.7 kB

	# deep_dive_agentic.py

	"""
	Agentic analytical code generation + execution engine using Hugging Face

	FLOW:
	User Question
	↓
	LLM generates pandas code
	↓
	Python executes code safely
	↓
	LLM interprets results
	↓
	Return code + interpretation

	Environment:
	export HUGGINGFACE_API_TOKEN=...

	FIXES APPLIED (v2):
	- FIX 1: exec() now uses a single merged namespace dict so result variables
	are reliably written back (Python bug with separate globals/locals).
	- FIX 2: Smart result detection — scans namespace for any new DataFrame/Series
	instead of relying on hardcoded variable names (result_1, final_result).
	- FIX 3: _fix_pandas_compatibility() is now actually called before exec().
	"""

	# ---------------------------------------------------
	# IMPORTS
	# ---------------------------------------------------

	import pandas as pd
	import json
	import os
	import re

	try:
	from huggingface_hub import InferenceClient
	except ImportError as exc:
	raise ImportError(
	"huggingface_hub is required. Install with `pip install huggingface-hub`."
	) from exc

	from analytics.performance_analysis import generate_metric_view

	# ---------------------------------------------------
	# HF CONFIG
	# ---------------------------------------------------

	HF_CODE_MODEL_ID = os.environ.get("HF_MODEL_ID", "Qwen/Qwen2.5-Coder-7B-Instruct")
	HF_MODEL_ID = os.environ.get("HF_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
	HF_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")


	# ---------------------------------------------------
	# HELPER: GET INFERENCE CLIENT
	# ---------------------------------------------------

	def _get_hf_client():
	if not HF_TOKEN:
	raise RuntimeError(
	"HUGGINGFACE_API_TOKEN is required. Set it in your environment."
	)
	return InferenceClient(token=HF_TOKEN)


	# ---------------------------------------------------
	# HELPER: EXTRACT JSON FROM LLM RESPONSE
	# ---------------------------------------------------

	def _extract_json(text: str):
	match = re.search(r"\{.*\}", text, re.S)
	if not match:
	return None
	payload = match.group(0)
	try:
	return json.loads(payload)
	except json.JSONDecodeError:
	try:
	cleaned = re.sub(r"[\n\r]+", " ", payload)
	cleaned = re.sub(r"(['\"])?([a-zA-Z0-9_]+)(['\"])?\s:\s", r'"\2": ', cleaned)
	return json.loads(cleaned)
	except Exception:
	return None


	# ---------------------------------------------------
	# HELPER: FIX COMMON PANDAS COMPATIBILITY ISSUES
	# ---------------------------------------------------

	def _fix_pandas_compatibility(code: str) -> str:
	"""
	Fix common pandas API compatibility issues in generated code.
	Handles version differences between pandas versions.
	"""
	# Fix: .reset_index(name=...) -> .reset_index(names=[...])
	code = re.sub(
	r"\.reset_index\(name=(['\"])([^'\"]+)\1\)",
	r".reset_index(names=[\1\2\1])",
	code
	)

	# Fix: .reset_index(name= with variable
	code = re.sub(
	r"\.reset_index\(name=([a-zA-Z_][a-zA-Z0-9_]*)\)",
	r".reset_index(names=[\1])",
	code
	)

	# Fix: df.append() deprecated in newer pandas -> pd.concat()
	code = re.sub(
	r"(\w+)\.append\((\w+),\s*ignore_index=True\)",
	r"pd.concat([\1, \2], ignore_index=True)",
	code
	)

	return code


	# ---------------------------------------------------
	# STEP 1: CODE GENERATION
	# ---------------------------------------------------

	def generate_analysis_requirements(
	question: str,
	acq: pd.DataFrame,
	perf: pd.DataFrame,
	master_df: pd.DataFrame
	):
	"""
	LLM breaks down question into 1-3 structured analytics requirements.
	Each requirement includes a description and executable pandas code.
	"""

	client = _get_hf_client()

	# Build detailed column descriptions
	acq_cols = {
	"account_id": "unique account identifier",
	"booking_date": "when account was originated",
	"booking_vintage": "year-month of origination (YYYY-MM)",
	"fico_band": "FICO score bracket (e.g., 700-750, 750-800)",
	"sourcing_channel": "acquisition channel (e.g., Online, Branch, Broker)",
	"city_tier": "city classification (Tier-1, Tier-2, Tier-3)",
	"occupation_type": "borrower occupation category",
	"credit_limit": "approved credit line amount"
	}

	perf_cols = {
	"account_id": "unique account identifier",
	"reporting_month": "month of performance observation (YYYY-MM)",
	"mob": "months on books (age of account in months)",
	"dpd": "days past due (0, 30, 60, 90+)",
	"balance": "current outstanding balance",
	"ncl_amount": "net charge-off amount (dollars)",
	"payment": "payment amount in period"
	}

	prompt = (
	# ================================================================
	# SECTION 1: ROLE & OBJECTIVE
	# ================================================================
	"You are a senior retail credit risk analyst with 15+ years of experience "
	"managing consumer credit portfolios. Your job is to analyze a user question, "
	"plan the right analytical approach, write executable pandas code, and return "
	"a structured JSON plan.\n\n"

	"You have access to credit portfolio data. You will generate up to 3 pandas "
	"code blocks (requirements) to answer the question. Each requirement produces "
	"one result table. A comparison of two periods can and should be done in ONE "
	"requirement by building a single table with both periods as columns plus a "
	"delta column — do not waste two requirements on what can be one clean table.\n\n"

	# ================================================================
	# SECTION 2: DATA DICTIONARY
	# ================================================================
	"================================================================\n"
	"SECTION 2: DATA DICTIONARY\n"
	"================================================================\n\n"

	"THREE dataframes are available. Use master_df for any analysis that needs "
	"both acquisition attributes and performance metrics together.\n\n"

	"acq — acquisition data (one row per account):\n"
	" - account_id : unique account identifier (string)\n"
	" - booking_date : origination date (datetime)\n"
	" - booking_vintage : origination month as STRING in format 'YYYY-MM' e.g. '2024-07'\n"
	" - fico_band : FICO score bracket (string) e.g. '700-750', '750-800'\n"
	" - sourcing_channel : acquisition channel (string) e.g. 'Online', 'Branch', 'Broker'\n"
	" - city_tier : city classification (string) e.g. 'Tier-1', 'Tier-2', 'Tier-3'\n"
	" - occupation_type : borrower occupation (string) e.g. 'Salaried', 'Self-Employed'\n"
	" - credit_limit : approved credit line in currency units (float)\n\n"

	"perf — monthly performance data (one row per account per month):\n"
	" - account_id : unique account identifier (string)\n"
	" - reporting_month : observation month as STRING in format 'YYYY-MM' e.g. '2024-10'\n"
	" - mob : months on books — integer age of account (1, 2, 3 ... 12+)\n"
	" - dpd : days past due at that mob (integer: 0, 30, 60, 90)\n"
	" - balance : outstanding balance at that mob (float)\n"
	" - ncl_amount : net charge-off amount at that mob (float, 0 if not charged off)\n"
	" - payment : payment made in that month (float)\n\n"

	"master_df — perf LEFT JOINED with acq on account_id. Contains ALL columns above.\n"
	"IMPORTANT: Always use master_df when you need both segment columns AND performance columns.\n\n"

	"SEGMENT COLUMNS available for groupby in master_df:\n"
	" - booking_vintage, fico_band, sourcing_channel, city_tier, occupation_type\n\n"

	# ================================================================
	# SECTION 3: METRIC DEFINITIONS & EXACT FORMULAS
	# ================================================================
	"================================================================\n"
	"SECTION 3: METRIC DEFINITIONS & EXACT FORMULAS\n"
	"================================================================\n\n"

	"--- METRIC 1: Yr1 NCL Rate (Year 1 Net Charge-Off Rate) ---\n"
	"Definition: The annualised net charge-off rate for a vintage cohort over its first 12 months.\n"
	"What it measures: Credit loss intensity. Higher is worse.\n"
	"Formula:\n"
	" Yr1 NCL Rate = sum(ncl_amount where mob IN 1..12) / (sum(balance where mob IN 1..12) / 12) x 100\n"
	"Steps in pandas:\n"
	" Step 1 — Filter master_df for selected vintages AND mob between 1 and 12\n"
	" Step 2 — sum ncl_amount across all rows in that filter\n"
	" Step 3 — sum balance across all rows in that filter, then divide by 12\n"
	" Step 4 — divide Step 2 by Step 3, multiply by 100\n"
	"Pandas recipe (single vintage group):\n"
	" df_v = master_df[master_df['booking_vintage'].isin(vintages) & master_df['mob'].between(1,12)]\n"
	" ncl_rate = (df_v['ncl_amount'].sum() / (df_v['balance'].sum() / 12)) * 100\n\n"

	"--- METRIC 2: 30+@3 (Early Delinquency Rate at MOB 3) ---\n"
	"Definition: Share of accounts that are 30+ days past due at exactly month 3.\n"
	"What it measures: Early stress signal. Higher is worse.\n"
	"Formula:\n"
	" 30+@3 = count(accounts where mob==3 AND dpd>=30) / count(accounts where mob==3) x 100\n"
	"Pandas recipe:\n"
	" df_m3 = master_df[master_df['booking_vintage'].isin(vintages) & (master_df['mob']==3)]\n"
	" rate_30_3 = (df_m3['dpd'] >= 30).sum() / len(df_m3) * 100\n\n"

	"--- METRIC 3: 30+@6 (Delinquency Rate at MOB 6) ---\n"
	"Definition: Share of accounts that are 30+ days past due at exactly month 6.\n"
	"Formula:\n"
	" 30+@6 = count(accounts where mob==6 AND dpd>=30) / count(accounts where mob==6) x 100\n"
	"Pandas recipe:\n"
	" df_m6 = master_df[master_df['booking_vintage'].isin(vintages) & (master_df['mob']==6)]\n"
	" rate_30_6 = (df_m6['dpd'] >= 30).sum() / len(df_m6) * 100\n\n"

	"--- METRIC 4: 60+@6 (Severe Delinquency Rate at MOB 6) ---\n"
	"Definition: Share of accounts that are 60+ days past due at exactly month 6.\n"
	"Formula:\n"
	" 60+@6 = count(accounts where mob==6 AND dpd>=60) / count(accounts where mob==6) x 100\n"
	"Pandas recipe:\n"
	" rate_60_6 = (df_m6['dpd'] >= 60).sum() / len(df_m6) * 100\n\n"

	"--- RISK BENCHMARKS (flag in output) ---\n"
	"Metric \| Green (Good) \| Amber (Watch) \| Red (Bad)\n"
	"30+@3 \| < 3% \| 3% - 6% \| > 6%\n"
	"30+@6 \| < 5% \| 5% - 9% \| > 9%\n"
	"60+@6 \| < 2% \| 2% - 4% \| > 4%\n"
	"Yr1 NCL \| < 3% \| 3% - 6% \| > 6%\n\n"

	# ================================================================
	# SECTION 4: DATE & VINTAGE FILTERING RECIPES
	# ================================================================
	"================================================================\n"
	"SECTION 4: DATE & VINTAGE FILTERING RECIPES\n"
	"================================================================\n\n"

	"CRITICAL: booking_vintage and reporting_month are STRINGS in 'YYYY-MM' format.\n"
	"Never use .dt accessor or pd.Grouper on these columns — they are not datetime.\n"
	"Always filter using string operations or .isin() as shown below.\n\n"

	"Single vintage:\n"
	" master_df[master_df['booking_vintage'] == '2024-10']\n\n"

	"Full year:\n"
	" master_df[master_df['booking_vintage'].str.startswith('2024')]\n\n"

	"Quarter (Q1=Jan-Mar, Q2=Apr-Jun, Q3=Jul-Sep, Q4=Oct-Dec):\n"
	" q3_2024 = ['2024-07','2024-08','2024-09']\n"
	" master_df[master_df['booking_vintage'].isin(q3_2024)]\n\n"

	"Half year (H1=Jan-Jun, H2=Jul-Dec):\n"
	" h1_2024 = ['2024-01','2024-02','2024-03','2024-04','2024-05','2024-06']\n"
	" h2_2024 = ['2024-07','2024-08','2024-09','2024-10','2024-11','2024-12']\n\n"

	"Comparison in ONE table (e.g. 2024 vs 2025 full year):\n"
	" v2024 = master_df[master_df['booking_vintage'].str.startswith('2024') & master_df['mob'].between(1,12)]\n"
	" v2025 = master_df[master_df['booking_vintage'].str.startswith('2025') & master_df['mob'].between(1,12)]\n"
	" ncl_2024 = (v2024['ncl_amount'].sum() / (v2024['balance'].sum() / 12)) * 100\n"
	" ncl_2025 = (v2025['ncl_amount'].sum() / (v2025['balance'].sum() / 12)) * 100\n"
	" result_1 = pd.DataFrame({'Period':['2024','2025'], 'Yr1_NCL_Rate':[ncl_2024, ncl_2025]})\n"
	" result_1['Delta_vs_2024'] = result_1['Yr1_NCL_Rate'] - result_1['Yr1_NCL_Rate'].iloc[0]\n\n"

	"Segment-level comparison in ONE table:\n"
	" # Compute metric per segment for each period, merge into one table\n"
	" def ncl_rate(df): return (df['ncl_amount'].sum() / (df['balance'].sum() / 12)) * 100\n"
	" seg_2024 = v2024.groupby('fico_band').apply(ncl_rate).reset_index()\n"
	" seg_2024.columns = ['fico_band','NCL_2024']\n"
	" seg_2025 = v2025.groupby('fico_band').apply(ncl_rate).reset_index()\n"
	" seg_2025.columns = ['fico_band','NCL_2025']\n"
	" result_2 = seg_2024.merge(seg_2025, on='fico_band')\n"
	" result_2['Delta'] = result_2['NCL_2025'] - result_2['NCL_2024']\n"
	" result_2 = result_2.sort_values('NCL_2025', ascending=False)\n\n"

	# ================================================================
	# SECTION 5: QUESTION TYPE GUIDE
	# ================================================================
	"================================================================\n"
	"SECTION 5: QUESTION TYPE GUIDE — HOW TO PLAN YOUR REQUIREMENTS\n"
	"================================================================\n\n"

	"Read the question carefully and identify which type it is. Then plan accordingly.\n\n"

	"TYPE 1 — DIRECT COMPARISON (e.g. 'compare NCL 2024 vs 2025', 'how did Q3 perform vs Q4')\n"
	" Planning approach:\n"
	" - Identify the two periods being compared\n"
	" - Identify which metrics are relevant (if not stated, use Yr1 NCL + 30+@3 as default)\n"
	" - Req 1: Overall metric comparison — ONE table with [Period, Metric, Delta]\n"
	" - Req 2 (optional): Same comparison broken down by most relevant segment\n"
	" - Req 3 (optional): Second segment breakdown or second metric family\n"
	" - DO NOT use two requirements to compute the same thing for two periods separately.\n"
	" Merge them into ONE table.\n\n"

	"TYPE 2 — FOCUSED EXPLORATION (e.g. 'analyse FICO band performance', 'how is Tier-2 doing')\n"
	" Planning approach:\n"
	" - Identify the segment of interest\n"
	" - Identify the time window (if not stated, use last 4 available quarters)\n"
	" - Req 1: Metric summary across that segment — all values ranked worst to best\n"
	" - Req 2: Trend over time for the highest-risk sub-segments identified in Req 1\n"
	" - Req 3 (optional): Cross-segment comparison (e.g. FICO x channel interaction)\n\n"

	"TYPE 3 — OPEN / DIAGNOSTIC (e.g. 'what is wrong with the portfolio', 'give me a full view')\n"
	" These questions require broad scanning across all segments and metrics simultaneously.\n"
	" This level of analysis requires significantly more compute, parallel execution, and\n"
	" multiple LLM reasoning loops that are beyond the current system design.\n"
	" ACTION: Return a single requirement that computes a high-level portfolio scorecard\n"
	" (all 4 metrics for last 2 years), and include a note in the description explaining\n"
	" that a full diagnostic requires an advanced multi-agent setup.\n\n"

	# ================================================================
	# SECTION 6: WORKED EXAMPLES
	# ================================================================
	"================================================================\n"
	"SECTION 6: WORKED EXAMPLES\n"
	"================================================================\n\n"

	"EXAMPLE A — TYPE 1 COMPARISON:\n"
	"User question: 'Compare Yr1 NCL for 2024 and 2025 across FICO bands'\n"
	"Planning:\n"
	" - Two periods: 2024 full year, 2025 full year\n"
	" - Metric: Yr1 NCL\n"
	" - Segment: fico_band\n"
	" - Req 1: Overall NCL rate for 2024 vs 2025 in one table\n"
	" - Req 2: NCL rate by fico_band for 2024 vs 2025 with delta, sorted worst first\n"
	"Expected output shape for Req 2:\n"
	" fico_band \| NCL_2024 \| NCL_2025 \| Delta\n"
	" 600-650 \| 8.2 \| 7.1 \| -1.1 (improvement)\n"
	" 650-700 \| 5.4 \| 4.8 \| -0.6\n"
	" 700-750 \| 3.1 \| 2.9 \| -0.2\n"
	" 750-800 \| 1.8 \| 1.5 \| -0.3\n\n"

	"EXAMPLE B — TYPE 2 EXPLORATION:\n"
	"User question: 'How are different sourcing channels performing on early delinquency'\n"
	"Planning:\n"
	" - Segment: sourcing_channel\n"
	" - Metrics: 30+@3 and 30+@6 (early delinquency family)\n"
	" - Time window: last 4 quarters available in data\n"
	" - Req 1: 30+@3 and 30+@6 rates per channel, sorted worst first\n"
	" - Req 2: Trend of 30+@3 by channel across last 4 quarters (one row per quarter)\n"
	"Expected output shape for Req 1:\n"
	" sourcing_channel \| rate_30_3 \| rate_30_6 \| risk_flag\n"
	" Broker \| 7.8 \| 11.2 \| RED\n"
	" Online \| 4.1 \| 6.8 \| AMBER\n"
	" Branch \| 2.3 \| 4.1 \| GREEN\n\n"

	# ================================================================
	# SECTION 7: CODE GENERATION RULES
	# ================================================================
	"================================================================\n"
	"SECTION 7: CODE GENERATION RULES\n"
	"================================================================\n\n"

	"1. Always store final result in variable named exactly result_1, result_2, or result_3\n"
	" matching the sequence number of the requirement.\n\n"

	"2. Always use master_df when analysis needs both segment + performance columns.\n"
	" Use acq only for acquisition-only analysis (e.g. credit limit distribution).\n"
	" Use perf only for portfolio-wide performance with no segmentation.\n\n"

	"3. booking_vintage and reporting_month are strings. Never use .dt on them.\n"
	" Filter with == or .isin() or .str.startswith() only.\n\n"

	"4. Column names are EXACTLY as listed in Section 2. Do not guess or invent column names.\n"
	" If a column does not exist in the listed schema, do not use it.\n\n"

	"5. For comparisons: build ONE merged table with both periods as columns + delta.\n"
	" Do not produce two separate DataFrames for two periods.\n\n"

	"6. Add a risk_flag column where relevant using benchmarks from Section 3:\n"
	" df['risk_flag'] = pd.cut(df['rate'], bins=[0,3,6,100], labels=['GREEN','AMBER','RED'])\n\n"

	"7. Sort final result by the primary risk metric descending (worst first).\n\n"

	"8. In JSON, the code string must use \\n for newlines and escape all internal quotes.\n"
	" Do not put raw newlines inside the JSON string value.\n\n"

	"9. Keep code focused. No print statements. No plots. No file I/O.\n\n"

	# ================================================================
	# SECTION 8: JSON OUTPUT FORMAT
	# ================================================================
	"================================================================\n"
	"SECTION 8: OUTPUT FORMAT — RETURN ONLY THIS JSON, NOTHING ELSE\n"
	"================================================================\n\n"

	"{\n"
	' "requirements": [\n'
	' {\n'
	' "sequence": 1,\n'
	' "title": "Short descriptive title",\n'
	' "description": "What this analysis does, why it answers the question, what the output table shows",\n'
	' "code": "# pandas code here\\nresult_1 = ..."\n'
	' }\n'
	' ]\n'
	"}\n\n"

	"User Question: " + question
	)

	messages = [
	{
	"role": "system",
	"content": (
	"You are a senior credit risk analyst who writes pandas code for portfolio analytics. "
	"You MUST return ONLY valid JSON with no text before or after it. "
	"Always name final result variables exactly result_1, result_2, or result_3. "
	"booking_vintage and reporting_month are string columns in YYYY-MM format — never use .dt on them. "
	"Always use master_df when you need both segment and performance data."
	)
	},
	{"role": "user", "content": prompt}
	]

	response = client.chat.completions.create(
	model=HF_CODE_MODEL_ID,
	messages=messages,
	max_tokens=2048,
	temperature=0.1,
	top_p=0.95
	)

	response_text = (
	response.choices[0].message.content
	if hasattr(response, "choices")
	else str(response)
	)

	# Extract JSON
	spec = _extract_json(response_text)

	if not spec:
	return {
	"success": False,
	"requirements": [],
	"error": f"Failed to parse JSON from LLM response: {response_text[:200]}",
	"raw_response": response_text
	}

	requirements = spec.get("requirements", [])

	if not requirements:
	return {
	"success": False,
	"requirements": [],
	"error": f"LLM returned no requirements. Response keys: {list(spec.keys())}",
	"raw_response": response_text[:300]
	}

	print(f"[DEBUG] Generated {len(requirements)} requirements for question: {question[:80]}")
	for i, req in enumerate(requirements, 1):
	print(f" Req {i}: {req.get('title')}")

	return {
	"success": True,
	"requirements": requirements,
	"error": None
	}


	# ---------------------------------------------------
	# STEP 2: CODE EXECUTION (LOOPED)
	# ---------------------------------------------------

	def execute_requirement_code(
	code: str,
	acq: pd.DataFrame,
	perf: pd.DataFrame,
	master_df: pd.DataFrame,
	requirement_num: int
	):
	"""
	Safely execute generated pandas code for a single requirement.

	FIXES:
	- FIX 1: Single namespace dict passed to exec() so variable assignments
	are reliably captured (Python quirk with separate globals/locals).
	- FIX 2: Smart result detection — checks named keys first, then scans
	for any new DataFrame/Series, then any non-None new variable.
	- FIX 3: _fix_pandas_compatibility() called before exec().
	"""

	# FIX 3: Apply pandas compatibility patches BEFORE executing
	code = _fix_pandas_compatibility(code)

	# FIX 1: Merge everything into ONE dict so exec() writes back correctly.
	# When you pass separate globals + locals to exec(), Python's bytecode
	# compiler uses STORE_FAST which writes to an internal frame and does NOT
	# update the locals dict you passed in — so result variables always come
	# back None. Using a single namespace avoids this entirely.
	namespace = {
	"pd": pd,
	"generate_metric_view": generate_metric_view,
	"__builtins__": __builtins__,
	# Data available to generated code
	"acq": acq,
	"perf": perf,
	"master_df": master_df,
	}

	# Snapshot of keys before exec so we can detect newly created variables
	keys_before = set(namespace.keys())

	try:
	print(f"[DEBUG] Executing requirement {requirement_num}...")
	print(f"[DEBUG] Code preview: {code[:120].strip()}...")

	exec(code, namespace) # FIX 1: single namespace

	# FIX 2: Smart result detection — three priority tiers

	# --- Tier 1: expected named result variables ---
	result = None
	expected_keys = [
	f"result_{requirement_num}",
	"final_result",
	"result",
	]
	for key in expected_keys:
	if key in namespace and namespace[key] is not None:
	result = namespace[key]
	print(f"[DEBUG] Found result in expected variable: '{key}'")
	break

	# --- Tier 2: any NEW DataFrame or Series created during exec ---
	if result is None:
	new_keys = set(namespace.keys()) - keys_before
	for key in new_keys:
	val = namespace[key]
	if isinstance(val, (pd.DataFrame, pd.Series)) and val is not None:
	result = val
	print(f"[DEBUG] Found result by scanning new DataFrame/Series: '{key}'")
	break

	# --- Tier 3: any new non-None, non-private variable ---
	if result is None:
	new_keys = set(namespace.keys()) - keys_before
	for key in sorted(new_keys): # sorted for determinism
	if key.startswith("_"):
	continue
	val = namespace[key]
	if val is not None:
	result = val
	print(f"[DEBUG] Fallback: found result in new variable: '{key}'")
	break

	if result is None:
	result = "Code executed successfully but no result variable was found in namespace."

	print(f"[DEBUG] Req {requirement_num} success. Result type: {type(result).__name__}")
	return {
	"success": True,
	"result": result,
	"error": None
	}

	except Exception as e:
	import traceback
	tb = traceback.format_exc()
	print(f"[DEBUG] Req {requirement_num} FAILED: {str(e)}")
	print(f"[DEBUG] Traceback:\n{tb}")
	return {
	"success": False,
	"result": None,
	"error": str(e)
	}


	def execute_all_requirements(
	requirements: list,
	acq: pd.DataFrame,
	perf: pd.DataFrame,
	master_df: pd.DataFrame
	):
	"""
	Execute all requirements sequentially, building context.
	"""

	print(f"[DEBUG] Starting execution of {len(requirements)} requirements")
	all_results = []
	context_text = ""

	for i, req in enumerate(requirements, 1):
	code = req.get("code", "")
	description = req.get("description", "")
	title = req.get("title", f"Analysis {i}")

	exec_result = execute_requirement_code(code, acq, perf, master_df, i)

	all_results.append({
	"sequence": i,
	"title": title,
	"description": description,
	"code": code,
	# "success" is what app.py checks via res.get("success")
	# "execution_success" kept for backward compatibility
	"success": exec_result["success"],
	"execution_success": exec_result["success"],
	"result": exec_result["result"],
	"error": exec_result.get("error")
	})

	# Build context for interpretation
	if exec_result["success"]:
	context_text += f"\nAnalysis {i} ({title}):\n{str(exec_result['result'])}\n"
	else:
	context_text += f"\nAnalysis {i} ({title}) FAILED:\n{exec_result['error']}\n"

	return all_results, context_text


	# ---------------------------------------------------
	# STEP 3: RESULT INTERPRETATION
	# ---------------------------------------------------

	def interpret_all_results(
	question: str,
	all_results: list,
	context_text: str
	):
	"""
	Senior risk analyst LLM interprets all results holistically.
	"""

	print(f"[DEBUG] Interpreting results for {len(all_results)} analyses")
	print(f"[DEBUG] Successful executions: {sum(1 for r in all_results if r.get('success'))}")

	client = _get_hf_client()

	# Format all analyses
	analyses_text = ""
	for res in all_results:
	analyses_text += f"\n{'=' * 60}\n"
	analyses_text += f"Analysis {res['sequence']}: {res['title']}\n"
	analyses_text += f"Description: {res['description']}\n"
	analyses_text += f"{'=' * 60}\n"
	if res["success"]:
	analyses_text += f"Result:\n{str(res['result'])}\n"
	else:
	analyses_text += f"Execution Error: {res['error']}\n"

	prompt = (
	"You are a senior retail credit risk analyst with 15+ years of portfolio management experience.\n\n"

	"You have just run a set of pandas analyses on a credit portfolio. "
	"Your task is to interpret the results and deliver a structured, specific, "
	"actionable risk assessment. Do not repeat raw numbers from the tables — "
	"interpret what they mean for the business.\n\n"

	"RISK BENCHMARKS FOR REFERENCE:\n"
	" 30+@3 : Green < 3% \| Amber 3-6% \| Red > 6%\n"
	" 30+@6 : Green < 5% \| Amber 5-9% \| Red > 9%\n"
	" 60+@6 : Green < 2% \| Amber 2-4% \| Red > 4%\n"
	" Yr1 NCL: Green < 3% \| Amber 3-6% \| Red > 6%\n\n"

	"STRUCTURE YOUR RESPONSE IN EXACTLY THESE 4 SECTIONS:\n\n"

	"1. HEADLINE FINDING (2-3 sentences)\n"
	" The single most important thing the data shows. Be specific — name the metric, "
	" the segment or vintage, and the direction (improving/worsening).\n\n"

	"2. KEY FINDINGS (3-5 bullet points)\n"
	" Each bullet must:\n"
	" - Name a specific segment, vintage, or metric (not generic statements)\n"
	" - State the actual value and what benchmark zone it falls in (Green/Amber/Red)\n"
	" - State whether it is improving, stable, or worsening vs the comparison period\n"
	" Example: '• Broker channel 30+@3 is 7.8% (RED) — worsening by +2.1pp vs 2024, "
	" suggesting deteriorating acquisition quality in this channel.'\n\n"

	"3. RED FLAGS (list only if any metric is in Amber or Red zone)\n"
	" For each red flag:\n"
	" - Name the segment/vintage and metric\n"
	" - State the value and benchmark zone\n"
	" - Give one specific business hypothesis for why this is happening\n"
	" If no red flags: write 'No red flags — all metrics within Green benchmarks.'\n\n"

	"4. RECOMMENDATIONS (2-4 actionable items)\n"
	" Each recommendation must be:\n"
	" - Tied to a specific finding above (not generic advice)\n"
	" - Actionable by a risk or credit team (tighten policy, adjust limit, investigate, monitor)\n"
	" - Prioritised: label each as IMMEDIATE, SHORT-TERM, or MONITOR\n"
	" Example: '[IMMEDIATE] Tighten credit bureau cut-off for Broker channel acquisitions — "
	" 30+@3 at 7.8% exceeds Red threshold and is trending upward.'\n\n"

	"User's Original Question:\n" + question + "\n\n"

	"Analyses Performed:\n" + analyses_text + "\n\n"

	"Provide your structured interpretation now:"
	)

	messages = [
	{
	"role": "system",
	"content": (
	"You are a senior credit risk analyst delivering a structured portfolio risk assessment. "
	"Be specific — always name segments, vintages, and metrics by name. "
	"Always reference benchmark zones (Green/Amber/Red). "
	"Never give generic advice. Every recommendation must trace back to a specific data finding."
	)
	},
	{"role": "user", "content": prompt}
	]

	response = client.chat.completions.create(
	model=HF_MODEL_ID,
	messages=messages,
	max_tokens=1024,
	temperature=0.3,
	top_p=0.95
	)

	interpretation = (
	response.choices[0].message.content
	if hasattr(response, "choices")
	else str(response)
	)
	return interpretation


	# ---------------------------------------------------
	# MASTER ORCHESTRATOR FUNCTION
	# ---------------------------------------------------

	def run_deep_dive_analysis(
	question: str,
	acq: pd.DataFrame,
	perf: pd.DataFrame,
	master_df: pd.DataFrame
	):
	"""
	End-to-end deep dive analysis:
	1. Break question into 1-3 structured requirements
	2. Generate code for each requirement
	3. Execute each requirement's code sequentially
	4. Synthesize results and provide senior analyst interpretation
	"""

	print(f"\n[DEEP DIVE START] Question: {question}")
	print(f"[DEBUG] Data shapes - Acq: {acq.shape}, Perf: {perf.shape}, Master: {master_df.shape}")

	# Step 1: Generate requirements
	print("[DEBUG] Step 1: Generating requirements...")
	req_response = generate_analysis_requirements(question, acq, perf, master_df)

	if not req_response["success"]:
	return {
	"success": False,
	"question": question,
	"requirements": [],
	"all_results": [],
	"interpretation": f"Failed to generate requirements: {req_response['error']}",
	"error": req_response["error"]
	}

	requirements = req_response["requirements"][:3] # Cap at 3

	# Step 2 & 3: Execute all requirements
	print(f"[DEBUG] Step 2-3: Executing {len(requirements)} requirements...")
	all_results, context_text = execute_all_requirements(requirements, acq, perf, master_df)

	# Step 4: Interpret results
	print("[DEBUG] Step 4: Interpreting all results...")
	interpretation = interpret_all_results(question, all_results, context_text)
	print("[DEEP DIVE END] Analysis complete\n")

	return {
	"success": True,
	"question": question,
	"requirements": requirements,
	"all_results": all_results,
	"interpretation": interpretation,
	"error": None
	}