# deep_dive_agentic.py """ Agentic analytical code generation + execution engine using Hugging Face FLOW: User Question ↓ LLM generates pandas code ↓ Python executes code safely ↓ LLM interprets results ↓ Return code + interpretation Environment: export HUGGINGFACE_API_TOKEN=... FIXES APPLIED (v2): - FIX 1: exec() now uses a single merged namespace dict so result variables are reliably written back (Python bug with separate globals/locals). - FIX 2: Smart result detection — scans namespace for any new DataFrame/Series instead of relying on hardcoded variable names (result_1, final_result). - FIX 3: _fix_pandas_compatibility() is now actually called before exec(). """ # --------------------------------------------------- # IMPORTS # --------------------------------------------------- import pandas as pd import json import os import re try: from huggingface_hub import InferenceClient except ImportError as exc: raise ImportError( "huggingface_hub is required. Install with `pip install huggingface-hub`." ) from exc from analytics.performance_analysis import generate_metric_view # --------------------------------------------------- # HF CONFIG # --------------------------------------------------- HF_CODE_MODEL_ID = os.environ.get("HF_MODEL_ID", "Qwen/Qwen2.5-Coder-7B-Instruct") HF_MODEL_ID = os.environ.get("HF_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct") HF_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN") # --------------------------------------------------- # HELPER: GET INFERENCE CLIENT # --------------------------------------------------- def _get_hf_client(): if not HF_TOKEN: raise RuntimeError( "HUGGINGFACE_API_TOKEN is required. Set it in your environment." ) return InferenceClient(token=HF_TOKEN) # --------------------------------------------------- # HELPER: EXTRACT JSON FROM LLM RESPONSE # --------------------------------------------------- def _extract_json(text: str): match = re.search(r"\{.*\}", text, re.S) if not match: return None payload = match.group(0) try: return json.loads(payload) except json.JSONDecodeError: try: cleaned = re.sub(r"[\n\r]+", " ", payload) cleaned = re.sub(r"(['\"])?([a-zA-Z0-9_]+)(['\"])?\s*:\s*", r'"\2": ', cleaned) return json.loads(cleaned) except Exception: return None # --------------------------------------------------- # HELPER: FIX COMMON PANDAS COMPATIBILITY ISSUES # --------------------------------------------------- def _fix_pandas_compatibility(code: str) -> str: """ Fix common pandas API compatibility issues in generated code. Handles version differences between pandas versions. """ # Fix: .reset_index(name=...) -> .reset_index(names=[...]) code = re.sub( r"\.reset_index\(name=(['\"])([^'\"]+)\1\)", r".reset_index(names=[\1\2\1])", code ) # Fix: .reset_index(name= with variable code = re.sub( r"\.reset_index\(name=([a-zA-Z_][a-zA-Z0-9_]*)\)", r".reset_index(names=[\1])", code ) # Fix: df.append() deprecated in newer pandas -> pd.concat() code = re.sub( r"(\w+)\.append\((\w+),\s*ignore_index=True\)", r"pd.concat([\1, \2], ignore_index=True)", code ) return code # --------------------------------------------------- # STEP 1: CODE GENERATION # --------------------------------------------------- def generate_analysis_requirements( question: str, acq: pd.DataFrame, perf: pd.DataFrame, master_df: pd.DataFrame ): """ LLM breaks down question into 1-3 structured analytics requirements. Each requirement includes a description and executable pandas code. """ client = _get_hf_client() # Build detailed column descriptions acq_cols = { "account_id": "unique account identifier", "booking_date": "when account was originated", "booking_vintage": "year-month of origination (YYYY-MM)", "fico_band": "FICO score bracket (e.g., 700-750, 750-800)", "sourcing_channel": "acquisition channel (e.g., Online, Branch, Broker)", "city_tier": "city classification (Tier-1, Tier-2, Tier-3)", "occupation_type": "borrower occupation category", "credit_limit": "approved credit line amount" } perf_cols = { "account_id": "unique account identifier", "reporting_month": "month of performance observation (YYYY-MM)", "mob": "months on books (age of account in months)", "dpd": "days past due (0, 30, 60, 90+)", "balance": "current outstanding balance", "ncl_amount": "net charge-off amount (dollars)", "payment": "payment amount in period" } prompt = ( # ================================================================ # SECTION 1: ROLE & OBJECTIVE # ================================================================ "You are a senior retail credit risk analyst with 15+ years of experience " "managing consumer credit portfolios. Your job is to analyze a user question, " "plan the right analytical approach, write executable pandas code, and return " "a structured JSON plan.\n\n" "You have access to credit portfolio data. You will generate up to 3 pandas " "code blocks (requirements) to answer the question. Each requirement produces " "one result table. A comparison of two periods can and should be done in ONE " "requirement by building a single table with both periods as columns plus a " "delta column — do not waste two requirements on what can be one clean table.\n\n" # ================================================================ # SECTION 2: DATA DICTIONARY # ================================================================ "================================================================\n" "SECTION 2: DATA DICTIONARY\n" "================================================================\n\n" "THREE dataframes are available. Use master_df for any analysis that needs " "both acquisition attributes and performance metrics together.\n\n" "acq — acquisition data (one row per account):\n" " - account_id : unique account identifier (string)\n" " - booking_date : origination date (datetime)\n" " - booking_vintage : origination month as STRING in format 'YYYY-MM' e.g. '2024-07'\n" " - fico_band : FICO score bracket (string) e.g. '700-750', '750-800'\n" " - sourcing_channel : acquisition channel (string) e.g. 'Online', 'Branch', 'Broker'\n" " - city_tier : city classification (string) e.g. 'Tier-1', 'Tier-2', 'Tier-3'\n" " - occupation_type : borrower occupation (string) e.g. 'Salaried', 'Self-Employed'\n" " - credit_limit : approved credit line in currency units (float)\n\n" "perf — monthly performance data (one row per account per month):\n" " - account_id : unique account identifier (string)\n" " - reporting_month : observation month as STRING in format 'YYYY-MM' e.g. '2024-10'\n" " - mob : months on books — integer age of account (1, 2, 3 ... 12+)\n" " - dpd : days past due at that mob (integer: 0, 30, 60, 90)\n" " - balance : outstanding balance at that mob (float)\n" " - ncl_amount : net charge-off amount at that mob (float, 0 if not charged off)\n" " - payment : payment made in that month (float)\n\n" "master_df — perf LEFT JOINED with acq on account_id. Contains ALL columns above.\n" "IMPORTANT: Always use master_df when you need both segment columns AND performance columns.\n\n" "SEGMENT COLUMNS available for groupby in master_df:\n" " - booking_vintage, fico_band, sourcing_channel, city_tier, occupation_type\n\n" # ================================================================ # SECTION 3: METRIC DEFINITIONS & EXACT FORMULAS # ================================================================ "================================================================\n" "SECTION 3: METRIC DEFINITIONS & EXACT FORMULAS\n" "================================================================\n\n" "--- METRIC 1: Yr1 NCL Rate (Year 1 Net Charge-Off Rate) ---\n" "Definition: The annualised net charge-off rate for a vintage cohort over its first 12 months.\n" "What it measures: Credit loss intensity. Higher is worse.\n" "Formula:\n" " Yr1 NCL Rate = sum(ncl_amount where mob IN 1..12) / (sum(balance where mob IN 1..12) / 12) x 100\n" "Steps in pandas:\n" " Step 1 — Filter master_df for selected vintages AND mob between 1 and 12\n" " Step 2 — sum ncl_amount across all rows in that filter\n" " Step 3 — sum balance across all rows in that filter, then divide by 12\n" " Step 4 — divide Step 2 by Step 3, multiply by 100\n" "Pandas recipe (single vintage group):\n" " df_v = master_df[master_df['booking_vintage'].isin(vintages) & master_df['mob'].between(1,12)]\n" " ncl_rate = (df_v['ncl_amount'].sum() / (df_v['balance'].sum() / 12)) * 100\n\n" "--- METRIC 2: 30+@3 (Early Delinquency Rate at MOB 3) ---\n" "Definition: Share of accounts that are 30+ days past due at exactly month 3.\n" "What it measures: Early stress signal. Higher is worse.\n" "Formula:\n" " 30+@3 = count(accounts where mob==3 AND dpd>=30) / count(accounts where mob==3) x 100\n" "Pandas recipe:\n" " df_m3 = master_df[master_df['booking_vintage'].isin(vintages) & (master_df['mob']==3)]\n" " rate_30_3 = (df_m3['dpd'] >= 30).sum() / len(df_m3) * 100\n\n" "--- METRIC 3: 30+@6 (Delinquency Rate at MOB 6) ---\n" "Definition: Share of accounts that are 30+ days past due at exactly month 6.\n" "Formula:\n" " 30+@6 = count(accounts where mob==6 AND dpd>=30) / count(accounts where mob==6) x 100\n" "Pandas recipe:\n" " df_m6 = master_df[master_df['booking_vintage'].isin(vintages) & (master_df['mob']==6)]\n" " rate_30_6 = (df_m6['dpd'] >= 30).sum() / len(df_m6) * 100\n\n" "--- METRIC 4: 60+@6 (Severe Delinquency Rate at MOB 6) ---\n" "Definition: Share of accounts that are 60+ days past due at exactly month 6.\n" "Formula:\n" " 60+@6 = count(accounts where mob==6 AND dpd>=60) / count(accounts where mob==6) x 100\n" "Pandas recipe:\n" " rate_60_6 = (df_m6['dpd'] >= 60).sum() / len(df_m6) * 100\n\n" "--- RISK BENCHMARKS (flag in output) ---\n" "Metric | Green (Good) | Amber (Watch) | Red (Bad)\n" "30+@3 | < 3% | 3% - 6% | > 6%\n" "30+@6 | < 5% | 5% - 9% | > 9%\n" "60+@6 | < 2% | 2% - 4% | > 4%\n" "Yr1 NCL | < 3% | 3% - 6% | > 6%\n\n" # ================================================================ # SECTION 4: DATE & VINTAGE FILTERING RECIPES # ================================================================ "================================================================\n" "SECTION 4: DATE & VINTAGE FILTERING RECIPES\n" "================================================================\n\n" "CRITICAL: booking_vintage and reporting_month are STRINGS in 'YYYY-MM' format.\n" "Never use .dt accessor or pd.Grouper on these columns — they are not datetime.\n" "Always filter using string operations or .isin() as shown below.\n\n" "Single vintage:\n" " master_df[master_df['booking_vintage'] == '2024-10']\n\n" "Full year:\n" " master_df[master_df['booking_vintage'].str.startswith('2024')]\n\n" "Quarter (Q1=Jan-Mar, Q2=Apr-Jun, Q3=Jul-Sep, Q4=Oct-Dec):\n" " q3_2024 = ['2024-07','2024-08','2024-09']\n" " master_df[master_df['booking_vintage'].isin(q3_2024)]\n\n" "Half year (H1=Jan-Jun, H2=Jul-Dec):\n" " h1_2024 = ['2024-01','2024-02','2024-03','2024-04','2024-05','2024-06']\n" " h2_2024 = ['2024-07','2024-08','2024-09','2024-10','2024-11','2024-12']\n\n" "Comparison in ONE table (e.g. 2024 vs 2025 full year):\n" " v2024 = master_df[master_df['booking_vintage'].str.startswith('2024') & master_df['mob'].between(1,12)]\n" " v2025 = master_df[master_df['booking_vintage'].str.startswith('2025') & master_df['mob'].between(1,12)]\n" " ncl_2024 = (v2024['ncl_amount'].sum() / (v2024['balance'].sum() / 12)) * 100\n" " ncl_2025 = (v2025['ncl_amount'].sum() / (v2025['balance'].sum() / 12)) * 100\n" " result_1 = pd.DataFrame({'Period':['2024','2025'], 'Yr1_NCL_Rate':[ncl_2024, ncl_2025]})\n" " result_1['Delta_vs_2024'] = result_1['Yr1_NCL_Rate'] - result_1['Yr1_NCL_Rate'].iloc[0]\n\n" "Segment-level comparison in ONE table:\n" " # Compute metric per segment for each period, merge into one table\n" " def ncl_rate(df): return (df['ncl_amount'].sum() / (df['balance'].sum() / 12)) * 100\n" " seg_2024 = v2024.groupby('fico_band').apply(ncl_rate).reset_index()\n" " seg_2024.columns = ['fico_band','NCL_2024']\n" " seg_2025 = v2025.groupby('fico_band').apply(ncl_rate).reset_index()\n" " seg_2025.columns = ['fico_band','NCL_2025']\n" " result_2 = seg_2024.merge(seg_2025, on='fico_band')\n" " result_2['Delta'] = result_2['NCL_2025'] - result_2['NCL_2024']\n" " result_2 = result_2.sort_values('NCL_2025', ascending=False)\n\n" # ================================================================ # SECTION 5: QUESTION TYPE GUIDE # ================================================================ "================================================================\n" "SECTION 5: QUESTION TYPE GUIDE — HOW TO PLAN YOUR REQUIREMENTS\n" "================================================================\n\n" "Read the question carefully and identify which type it is. Then plan accordingly.\n\n" "TYPE 1 — DIRECT COMPARISON (e.g. 'compare NCL 2024 vs 2025', 'how did Q3 perform vs Q4')\n" " Planning approach:\n" " - Identify the two periods being compared\n" " - Identify which metrics are relevant (if not stated, use Yr1 NCL + 30+@3 as default)\n" " - Req 1: Overall metric comparison — ONE table with [Period, Metric, Delta]\n" " - Req 2 (optional): Same comparison broken down by most relevant segment\n" " - Req 3 (optional): Second segment breakdown or second metric family\n" " - DO NOT use two requirements to compute the same thing for two periods separately.\n" " Merge them into ONE table.\n\n" "TYPE 2 — FOCUSED EXPLORATION (e.g. 'analyse FICO band performance', 'how is Tier-2 doing')\n" " Planning approach:\n" " - Identify the segment of interest\n" " - Identify the time window (if not stated, use last 4 available quarters)\n" " - Req 1: Metric summary across that segment — all values ranked worst to best\n" " - Req 2: Trend over time for the highest-risk sub-segments identified in Req 1\n" " - Req 3 (optional): Cross-segment comparison (e.g. FICO x channel interaction)\n\n" "TYPE 3 — OPEN / DIAGNOSTIC (e.g. 'what is wrong with the portfolio', 'give me a full view')\n" " These questions require broad scanning across all segments and metrics simultaneously.\n" " This level of analysis requires significantly more compute, parallel execution, and\n" " multiple LLM reasoning loops that are beyond the current system design.\n" " ACTION: Return a single requirement that computes a high-level portfolio scorecard\n" " (all 4 metrics for last 2 years), and include a note in the description explaining\n" " that a full diagnostic requires an advanced multi-agent setup.\n\n" # ================================================================ # SECTION 6: WORKED EXAMPLES # ================================================================ "================================================================\n" "SECTION 6: WORKED EXAMPLES\n" "================================================================\n\n" "EXAMPLE A — TYPE 1 COMPARISON:\n" "User question: 'Compare Yr1 NCL for 2024 and 2025 across FICO bands'\n" "Planning:\n" " - Two periods: 2024 full year, 2025 full year\n" " - Metric: Yr1 NCL\n" " - Segment: fico_band\n" " - Req 1: Overall NCL rate for 2024 vs 2025 in one table\n" " - Req 2: NCL rate by fico_band for 2024 vs 2025 with delta, sorted worst first\n" "Expected output shape for Req 2:\n" " fico_band | NCL_2024 | NCL_2025 | Delta\n" " 600-650 | 8.2 | 7.1 | -1.1 (improvement)\n" " 650-700 | 5.4 | 4.8 | -0.6\n" " 700-750 | 3.1 | 2.9 | -0.2\n" " 750-800 | 1.8 | 1.5 | -0.3\n\n" "EXAMPLE B — TYPE 2 EXPLORATION:\n" "User question: 'How are different sourcing channels performing on early delinquency'\n" "Planning:\n" " - Segment: sourcing_channel\n" " - Metrics: 30+@3 and 30+@6 (early delinquency family)\n" " - Time window: last 4 quarters available in data\n" " - Req 1: 30+@3 and 30+@6 rates per channel, sorted worst first\n" " - Req 2: Trend of 30+@3 by channel across last 4 quarters (one row per quarter)\n" "Expected output shape for Req 1:\n" " sourcing_channel | rate_30_3 | rate_30_6 | risk_flag\n" " Broker | 7.8 | 11.2 | RED\n" " Online | 4.1 | 6.8 | AMBER\n" " Branch | 2.3 | 4.1 | GREEN\n\n" # ================================================================ # SECTION 7: CODE GENERATION RULES # ================================================================ "================================================================\n" "SECTION 7: CODE GENERATION RULES\n" "================================================================\n\n" "1. Always store final result in variable named exactly result_1, result_2, or result_3\n" " matching the sequence number of the requirement.\n\n" "2. Always use master_df when analysis needs both segment + performance columns.\n" " Use acq only for acquisition-only analysis (e.g. credit limit distribution).\n" " Use perf only for portfolio-wide performance with no segmentation.\n\n" "3. booking_vintage and reporting_month are strings. Never use .dt on them.\n" " Filter with == or .isin() or .str.startswith() only.\n\n" "4. Column names are EXACTLY as listed in Section 2. Do not guess or invent column names.\n" " If a column does not exist in the listed schema, do not use it.\n\n" "5. For comparisons: build ONE merged table with both periods as columns + delta.\n" " Do not produce two separate DataFrames for two periods.\n\n" "6. Add a risk_flag column where relevant using benchmarks from Section 3:\n" " df['risk_flag'] = pd.cut(df['rate'], bins=[0,3,6,100], labels=['GREEN','AMBER','RED'])\n\n" "7. Sort final result by the primary risk metric descending (worst first).\n\n" "8. In JSON, the code string must use \\n for newlines and escape all internal quotes.\n" " Do not put raw newlines inside the JSON string value.\n\n" "9. Keep code focused. No print statements. No plots. No file I/O.\n\n" # ================================================================ # SECTION 8: JSON OUTPUT FORMAT # ================================================================ "================================================================\n" "SECTION 8: OUTPUT FORMAT — RETURN ONLY THIS JSON, NOTHING ELSE\n" "================================================================\n\n" "{\n" ' "requirements": [\n' ' {\n' ' "sequence": 1,\n' ' "title": "Short descriptive title",\n' ' "description": "What this analysis does, why it answers the question, what the output table shows",\n' ' "code": "# pandas code here\\nresult_1 = ..."\n' ' }\n' ' ]\n' "}\n\n" "User Question: " + question ) messages = [ { "role": "system", "content": ( "You are a senior credit risk analyst who writes pandas code for portfolio analytics. " "You MUST return ONLY valid JSON with no text before or after it. " "Always name final result variables exactly result_1, result_2, or result_3. " "booking_vintage and reporting_month are string columns in YYYY-MM format — never use .dt on them. " "Always use master_df when you need both segment and performance data." ) }, {"role": "user", "content": prompt} ] response = client.chat.completions.create( model=HF_CODE_MODEL_ID, messages=messages, max_tokens=2048, temperature=0.1, top_p=0.95 ) response_text = ( response.choices[0].message.content if hasattr(response, "choices") else str(response) ) # Extract JSON spec = _extract_json(response_text) if not spec: return { "success": False, "requirements": [], "error": f"Failed to parse JSON from LLM response: {response_text[:200]}", "raw_response": response_text } requirements = spec.get("requirements", []) if not requirements: return { "success": False, "requirements": [], "error": f"LLM returned no requirements. Response keys: {list(spec.keys())}", "raw_response": response_text[:300] } print(f"[DEBUG] Generated {len(requirements)} requirements for question: {question[:80]}") for i, req in enumerate(requirements, 1): print(f" Req {i}: {req.get('title')}") return { "success": True, "requirements": requirements, "error": None } # --------------------------------------------------- # STEP 2: CODE EXECUTION (LOOPED) # --------------------------------------------------- def execute_requirement_code( code: str, acq: pd.DataFrame, perf: pd.DataFrame, master_df: pd.DataFrame, requirement_num: int ): """ Safely execute generated pandas code for a single requirement. FIXES: - FIX 1: Single namespace dict passed to exec() so variable assignments are reliably captured (Python quirk with separate globals/locals). - FIX 2: Smart result detection — checks named keys first, then scans for any new DataFrame/Series, then any non-None new variable. - FIX 3: _fix_pandas_compatibility() called before exec(). """ # FIX 3: Apply pandas compatibility patches BEFORE executing code = _fix_pandas_compatibility(code) # FIX 1: Merge everything into ONE dict so exec() writes back correctly. # When you pass separate globals + locals to exec(), Python's bytecode # compiler uses STORE_FAST which writes to an internal frame and does NOT # update the locals dict you passed in — so result variables always come # back None. Using a single namespace avoids this entirely. namespace = { "pd": pd, "generate_metric_view": generate_metric_view, "__builtins__": __builtins__, # Data available to generated code "acq": acq, "perf": perf, "master_df": master_df, } # Snapshot of keys before exec so we can detect newly created variables keys_before = set(namespace.keys()) try: print(f"[DEBUG] Executing requirement {requirement_num}...") print(f"[DEBUG] Code preview: {code[:120].strip()}...") exec(code, namespace) # FIX 1: single namespace # FIX 2: Smart result detection — three priority tiers # --- Tier 1: expected named result variables --- result = None expected_keys = [ f"result_{requirement_num}", "final_result", "result", ] for key in expected_keys: if key in namespace and namespace[key] is not None: result = namespace[key] print(f"[DEBUG] Found result in expected variable: '{key}'") break # --- Tier 2: any NEW DataFrame or Series created during exec --- if result is None: new_keys = set(namespace.keys()) - keys_before for key in new_keys: val = namespace[key] if isinstance(val, (pd.DataFrame, pd.Series)) and val is not None: result = val print(f"[DEBUG] Found result by scanning new DataFrame/Series: '{key}'") break # --- Tier 3: any new non-None, non-private variable --- if result is None: new_keys = set(namespace.keys()) - keys_before for key in sorted(new_keys): # sorted for determinism if key.startswith("_"): continue val = namespace[key] if val is not None: result = val print(f"[DEBUG] Fallback: found result in new variable: '{key}'") break if result is None: result = "Code executed successfully but no result variable was found in namespace." print(f"[DEBUG] Req {requirement_num} success. Result type: {type(result).__name__}") return { "success": True, "result": result, "error": None } except Exception as e: import traceback tb = traceback.format_exc() print(f"[DEBUG] Req {requirement_num} FAILED: {str(e)}") print(f"[DEBUG] Traceback:\n{tb}") return { "success": False, "result": None, "error": str(e) } def execute_all_requirements( requirements: list, acq: pd.DataFrame, perf: pd.DataFrame, master_df: pd.DataFrame ): """ Execute all requirements sequentially, building context. """ print(f"[DEBUG] Starting execution of {len(requirements)} requirements") all_results = [] context_text = "" for i, req in enumerate(requirements, 1): code = req.get("code", "") description = req.get("description", "") title = req.get("title", f"Analysis {i}") exec_result = execute_requirement_code(code, acq, perf, master_df, i) all_results.append({ "sequence": i, "title": title, "description": description, "code": code, # "success" is what app.py checks via res.get("success") # "execution_success" kept for backward compatibility "success": exec_result["success"], "execution_success": exec_result["success"], "result": exec_result["result"], "error": exec_result.get("error") }) # Build context for interpretation if exec_result["success"]: context_text += f"\nAnalysis {i} ({title}):\n{str(exec_result['result'])}\n" else: context_text += f"\nAnalysis {i} ({title}) FAILED:\n{exec_result['error']}\n" return all_results, context_text # --------------------------------------------------- # STEP 3: RESULT INTERPRETATION # --------------------------------------------------- def interpret_all_results( question: str, all_results: list, context_text: str ): """ Senior risk analyst LLM interprets all results holistically. """ print(f"[DEBUG] Interpreting results for {len(all_results)} analyses") print(f"[DEBUG] Successful executions: {sum(1 for r in all_results if r.get('success'))}") client = _get_hf_client() # Format all analyses analyses_text = "" for res in all_results: analyses_text += f"\n{'=' * 60}\n" analyses_text += f"Analysis {res['sequence']}: {res['title']}\n" analyses_text += f"Description: {res['description']}\n" analyses_text += f"{'=' * 60}\n" if res["success"]: analyses_text += f"Result:\n{str(res['result'])}\n" else: analyses_text += f"Execution Error: {res['error']}\n" prompt = ( "You are a senior retail credit risk analyst with 15+ years of portfolio management experience.\n\n" "You have just run a set of pandas analyses on a credit portfolio. " "Your task is to interpret the results and deliver a structured, specific, " "actionable risk assessment. Do not repeat raw numbers from the tables — " "interpret what they mean for the business.\n\n" "RISK BENCHMARKS FOR REFERENCE:\n" " 30+@3 : Green < 3% | Amber 3-6% | Red > 6%\n" " 30+@6 : Green < 5% | Amber 5-9% | Red > 9%\n" " 60+@6 : Green < 2% | Amber 2-4% | Red > 4%\n" " Yr1 NCL: Green < 3% | Amber 3-6% | Red > 6%\n\n" "STRUCTURE YOUR RESPONSE IN EXACTLY THESE 4 SECTIONS:\n\n" "1. HEADLINE FINDING (2-3 sentences)\n" " The single most important thing the data shows. Be specific — name the metric, " " the segment or vintage, and the direction (improving/worsening).\n\n" "2. KEY FINDINGS (3-5 bullet points)\n" " Each bullet must:\n" " - Name a specific segment, vintage, or metric (not generic statements)\n" " - State the actual value and what benchmark zone it falls in (Green/Amber/Red)\n" " - State whether it is improving, stable, or worsening vs the comparison period\n" " Example: '• Broker channel 30+@3 is 7.8% (RED) — worsening by +2.1pp vs 2024, " " suggesting deteriorating acquisition quality in this channel.'\n\n" "3. RED FLAGS (list only if any metric is in Amber or Red zone)\n" " For each red flag:\n" " - Name the segment/vintage and metric\n" " - State the value and benchmark zone\n" " - Give one specific business hypothesis for why this is happening\n" " If no red flags: write 'No red flags — all metrics within Green benchmarks.'\n\n" "4. RECOMMENDATIONS (2-4 actionable items)\n" " Each recommendation must be:\n" " - Tied to a specific finding above (not generic advice)\n" " - Actionable by a risk or credit team (tighten policy, adjust limit, investigate, monitor)\n" " - Prioritised: label each as IMMEDIATE, SHORT-TERM, or MONITOR\n" " Example: '[IMMEDIATE] Tighten credit bureau cut-off for Broker channel acquisitions — " " 30+@3 at 7.8% exceeds Red threshold and is trending upward.'\n\n" "User's Original Question:\n" + question + "\n\n" "Analyses Performed:\n" + analyses_text + "\n\n" "Provide your structured interpretation now:" ) messages = [ { "role": "system", "content": ( "You are a senior credit risk analyst delivering a structured portfolio risk assessment. " "Be specific — always name segments, vintages, and metrics by name. " "Always reference benchmark zones (Green/Amber/Red). " "Never give generic advice. Every recommendation must trace back to a specific data finding." ) }, {"role": "user", "content": prompt} ] response = client.chat.completions.create( model=HF_MODEL_ID, messages=messages, max_tokens=1024, temperature=0.3, top_p=0.95 ) interpretation = ( response.choices[0].message.content if hasattr(response, "choices") else str(response) ) return interpretation # --------------------------------------------------- # MASTER ORCHESTRATOR FUNCTION # --------------------------------------------------- def run_deep_dive_analysis( question: str, acq: pd.DataFrame, perf: pd.DataFrame, master_df: pd.DataFrame ): """ End-to-end deep dive analysis: 1. Break question into 1-3 structured requirements 2. Generate code for each requirement 3. Execute each requirement's code sequentially 4. Synthesize results and provide senior analyst interpretation """ print(f"\n[DEEP DIVE START] Question: {question}") print(f"[DEBUG] Data shapes - Acq: {acq.shape}, Perf: {perf.shape}, Master: {master_df.shape}") # Step 1: Generate requirements print("[DEBUG] Step 1: Generating requirements...") req_response = generate_analysis_requirements(question, acq, perf, master_df) if not req_response["success"]: return { "success": False, "question": question, "requirements": [], "all_results": [], "interpretation": f"Failed to generate requirements: {req_response['error']}", "error": req_response["error"] } requirements = req_response["requirements"][:3] # Cap at 3 # Step 2 & 3: Execute all requirements print(f"[DEBUG] Step 2-3: Executing {len(requirements)} requirements...") all_results, context_text = execute_all_requirements(requirements, acq, perf, master_df) # Step 4: Interpret results print("[DEBUG] Step 4: Interpreting all results...") interpretation = interpret_all_results(question, all_results, context_text) print("[DEEP DIVE END] Analysis complete\n") return { "success": True, "question": question, "requirements": requirements, "all_results": all_results, "interpretation": interpretation, "error": None }