Spaces:
Sleeping
Sleeping
| # deep_dive_agentic.py | |
| """ | |
| Agentic analytical code generation + execution engine using Hugging Face | |
| FLOW: | |
| User Question | |
| ↓ | |
| LLM generates pandas code | |
| ↓ | |
| Python executes code safely | |
| ↓ | |
| LLM interprets results | |
| ↓ | |
| Return code + interpretation | |
| Environment: | |
| export HUGGINGFACE_API_TOKEN=... | |
| FIXES APPLIED (v2): | |
| - FIX 1: exec() now uses a single merged namespace dict so result variables | |
| are reliably written back (Python bug with separate globals/locals). | |
| - FIX 2: Smart result detection — scans namespace for any new DataFrame/Series | |
| instead of relying on hardcoded variable names (result_1, final_result). | |
| - FIX 3: _fix_pandas_compatibility() is now actually called before exec(). | |
| """ | |
| # --------------------------------------------------- | |
| # IMPORTS | |
| # --------------------------------------------------- | |
| import pandas as pd | |
| import json | |
| import os | |
| import re | |
| try: | |
| from huggingface_hub import InferenceClient | |
| except ImportError as exc: | |
| raise ImportError( | |
| "huggingface_hub is required. Install with `pip install huggingface-hub`." | |
| ) from exc | |
| from analytics.performance_analysis import generate_metric_view | |
| # --------------------------------------------------- | |
| # HF CONFIG | |
| # --------------------------------------------------- | |
| HF_CODE_MODEL_ID = os.environ.get("HF_MODEL_ID", "Qwen/Qwen2.5-Coder-7B-Instruct") | |
| HF_MODEL_ID = os.environ.get("HF_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct") | |
| HF_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN") | |
| # --------------------------------------------------- | |
| # HELPER: GET INFERENCE CLIENT | |
| # --------------------------------------------------- | |
| def _get_hf_client(): | |
| if not HF_TOKEN: | |
| raise RuntimeError( | |
| "HUGGINGFACE_API_TOKEN is required. Set it in your environment." | |
| ) | |
| return InferenceClient(token=HF_TOKEN) | |
| # --------------------------------------------------- | |
| # HELPER: EXTRACT JSON FROM LLM RESPONSE | |
| # --------------------------------------------------- | |
| def _extract_json(text: str): | |
| match = re.search(r"\{.*\}", text, re.S) | |
| if not match: | |
| return None | |
| payload = match.group(0) | |
| try: | |
| return json.loads(payload) | |
| except json.JSONDecodeError: | |
| try: | |
| cleaned = re.sub(r"[\n\r]+", " ", payload) | |
| cleaned = re.sub(r"(['\"])?([a-zA-Z0-9_]+)(['\"])?\s*:\s*", r'"\2": ', cleaned) | |
| return json.loads(cleaned) | |
| except Exception: | |
| return None | |
| # --------------------------------------------------- | |
| # HELPER: FIX COMMON PANDAS COMPATIBILITY ISSUES | |
| # --------------------------------------------------- | |
| def _fix_pandas_compatibility(code: str) -> str: | |
| """ | |
| Fix common pandas API compatibility issues in generated code. | |
| Handles version differences between pandas versions. | |
| """ | |
| # Fix: .reset_index(name=...) -> .reset_index(names=[...]) | |
| code = re.sub( | |
| r"\.reset_index\(name=(['\"])([^'\"]+)\1\)", | |
| r".reset_index(names=[\1\2\1])", | |
| code | |
| ) | |
| # Fix: .reset_index(name= with variable | |
| code = re.sub( | |
| r"\.reset_index\(name=([a-zA-Z_][a-zA-Z0-9_]*)\)", | |
| r".reset_index(names=[\1])", | |
| code | |
| ) | |
| # Fix: df.append() deprecated in newer pandas -> pd.concat() | |
| code = re.sub( | |
| r"(\w+)\.append\((\w+),\s*ignore_index=True\)", | |
| r"pd.concat([\1, \2], ignore_index=True)", | |
| code | |
| ) | |
| return code | |
| # --------------------------------------------------- | |
| # STEP 1: CODE GENERATION | |
| # --------------------------------------------------- | |
| def generate_analysis_requirements( | |
| question: str, | |
| acq: pd.DataFrame, | |
| perf: pd.DataFrame, | |
| master_df: pd.DataFrame | |
| ): | |
| """ | |
| LLM breaks down question into 1-3 structured analytics requirements. | |
| Each requirement includes a description and executable pandas code. | |
| """ | |
| client = _get_hf_client() | |
| # Build detailed column descriptions | |
| acq_cols = { | |
| "account_id": "unique account identifier", | |
| "booking_date": "when account was originated", | |
| "booking_vintage": "year-month of origination (YYYY-MM)", | |
| "fico_band": "FICO score bracket (e.g., 700-750, 750-800)", | |
| "sourcing_channel": "acquisition channel (e.g., Online, Branch, Broker)", | |
| "city_tier": "city classification (Tier-1, Tier-2, Tier-3)", | |
| "occupation_type": "borrower occupation category", | |
| "credit_limit": "approved credit line amount" | |
| } | |
| perf_cols = { | |
| "account_id": "unique account identifier", | |
| "reporting_month": "month of performance observation (YYYY-MM)", | |
| "mob": "months on books (age of account in months)", | |
| "dpd": "days past due (0, 30, 60, 90+)", | |
| "balance": "current outstanding balance", | |
| "ncl_amount": "net charge-off amount (dollars)", | |
| "payment": "payment amount in period" | |
| } | |
| prompt = ( | |
| # ================================================================ | |
| # SECTION 1: ROLE & OBJECTIVE | |
| # ================================================================ | |
| "You are a senior retail credit risk analyst with 15+ years of experience " | |
| "managing consumer credit portfolios. Your job is to analyze a user question, " | |
| "plan the right analytical approach, write executable pandas code, and return " | |
| "a structured JSON plan.\n\n" | |
| "You have access to credit portfolio data. You will generate up to 3 pandas " | |
| "code blocks (requirements) to answer the question. Each requirement produces " | |
| "one result table. A comparison of two periods can and should be done in ONE " | |
| "requirement by building a single table with both periods as columns plus a " | |
| "delta column — do not waste two requirements on what can be one clean table.\n\n" | |
| # ================================================================ | |
| # SECTION 2: DATA DICTIONARY | |
| # ================================================================ | |
| "================================================================\n" | |
| "SECTION 2: DATA DICTIONARY\n" | |
| "================================================================\n\n" | |
| "THREE dataframes are available. Use master_df for any analysis that needs " | |
| "both acquisition attributes and performance metrics together.\n\n" | |
| "acq — acquisition data (one row per account):\n" | |
| " - account_id : unique account identifier (string)\n" | |
| " - booking_date : origination date (datetime)\n" | |
| " - booking_vintage : origination month as STRING in format 'YYYY-MM' e.g. '2024-07'\n" | |
| " - fico_band : FICO score bracket (string) e.g. '700-750', '750-800'\n" | |
| " - sourcing_channel : acquisition channel (string) e.g. 'Online', 'Branch', 'Broker'\n" | |
| " - city_tier : city classification (string) e.g. 'Tier-1', 'Tier-2', 'Tier-3'\n" | |
| " - occupation_type : borrower occupation (string) e.g. 'Salaried', 'Self-Employed'\n" | |
| " - credit_limit : approved credit line in currency units (float)\n\n" | |
| "perf — monthly performance data (one row per account per month):\n" | |
| " - account_id : unique account identifier (string)\n" | |
| " - reporting_month : observation month as STRING in format 'YYYY-MM' e.g. '2024-10'\n" | |
| " - mob : months on books — integer age of account (1, 2, 3 ... 12+)\n" | |
| " - dpd : days past due at that mob (integer: 0, 30, 60, 90)\n" | |
| " - balance : outstanding balance at that mob (float)\n" | |
| " - ncl_amount : net charge-off amount at that mob (float, 0 if not charged off)\n" | |
| " - payment : payment made in that month (float)\n\n" | |
| "master_df — perf LEFT JOINED with acq on account_id. Contains ALL columns above.\n" | |
| "IMPORTANT: Always use master_df when you need both segment columns AND performance columns.\n\n" | |
| "SEGMENT COLUMNS available for groupby in master_df:\n" | |
| " - booking_vintage, fico_band, sourcing_channel, city_tier, occupation_type\n\n" | |
| # ================================================================ | |
| # SECTION 3: METRIC DEFINITIONS & EXACT FORMULAS | |
| # ================================================================ | |
| "================================================================\n" | |
| "SECTION 3: METRIC DEFINITIONS & EXACT FORMULAS\n" | |
| "================================================================\n\n" | |
| "--- METRIC 1: Yr1 NCL Rate (Year 1 Net Charge-Off Rate) ---\n" | |
| "Definition: The annualised net charge-off rate for a vintage cohort over its first 12 months.\n" | |
| "What it measures: Credit loss intensity. Higher is worse.\n" | |
| "Formula:\n" | |
| " Yr1 NCL Rate = sum(ncl_amount where mob IN 1..12) / (sum(balance where mob IN 1..12) / 12) x 100\n" | |
| "Steps in pandas:\n" | |
| " Step 1 — Filter master_df for selected vintages AND mob between 1 and 12\n" | |
| " Step 2 — sum ncl_amount across all rows in that filter\n" | |
| " Step 3 — sum balance across all rows in that filter, then divide by 12\n" | |
| " Step 4 — divide Step 2 by Step 3, multiply by 100\n" | |
| "Pandas recipe (single vintage group):\n" | |
| " df_v = master_df[master_df['booking_vintage'].isin(vintages) & master_df['mob'].between(1,12)]\n" | |
| " ncl_rate = (df_v['ncl_amount'].sum() / (df_v['balance'].sum() / 12)) * 100\n\n" | |
| "--- METRIC 2: 30+@3 (Early Delinquency Rate at MOB 3) ---\n" | |
| "Definition: Share of accounts that are 30+ days past due at exactly month 3.\n" | |
| "What it measures: Early stress signal. Higher is worse.\n" | |
| "Formula:\n" | |
| " 30+@3 = count(accounts where mob==3 AND dpd>=30) / count(accounts where mob==3) x 100\n" | |
| "Pandas recipe:\n" | |
| " df_m3 = master_df[master_df['booking_vintage'].isin(vintages) & (master_df['mob']==3)]\n" | |
| " rate_30_3 = (df_m3['dpd'] >= 30).sum() / len(df_m3) * 100\n\n" | |
| "--- METRIC 3: 30+@6 (Delinquency Rate at MOB 6) ---\n" | |
| "Definition: Share of accounts that are 30+ days past due at exactly month 6.\n" | |
| "Formula:\n" | |
| " 30+@6 = count(accounts where mob==6 AND dpd>=30) / count(accounts where mob==6) x 100\n" | |
| "Pandas recipe:\n" | |
| " df_m6 = master_df[master_df['booking_vintage'].isin(vintages) & (master_df['mob']==6)]\n" | |
| " rate_30_6 = (df_m6['dpd'] >= 30).sum() / len(df_m6) * 100\n\n" | |
| "--- METRIC 4: 60+@6 (Severe Delinquency Rate at MOB 6) ---\n" | |
| "Definition: Share of accounts that are 60+ days past due at exactly month 6.\n" | |
| "Formula:\n" | |
| " 60+@6 = count(accounts where mob==6 AND dpd>=60) / count(accounts where mob==6) x 100\n" | |
| "Pandas recipe:\n" | |
| " rate_60_6 = (df_m6['dpd'] >= 60).sum() / len(df_m6) * 100\n\n" | |
| "--- RISK BENCHMARKS (flag in output) ---\n" | |
| "Metric | Green (Good) | Amber (Watch) | Red (Bad)\n" | |
| "30+@3 | < 3% | 3% - 6% | > 6%\n" | |
| "30+@6 | < 5% | 5% - 9% | > 9%\n" | |
| "60+@6 | < 2% | 2% - 4% | > 4%\n" | |
| "Yr1 NCL | < 3% | 3% - 6% | > 6%\n\n" | |
| # ================================================================ | |
| # SECTION 4: DATE & VINTAGE FILTERING RECIPES | |
| # ================================================================ | |
| "================================================================\n" | |
| "SECTION 4: DATE & VINTAGE FILTERING RECIPES\n" | |
| "================================================================\n\n" | |
| "CRITICAL: booking_vintage and reporting_month are STRINGS in 'YYYY-MM' format.\n" | |
| "Never use .dt accessor or pd.Grouper on these columns — they are not datetime.\n" | |
| "Always filter using string operations or .isin() as shown below.\n\n" | |
| "Single vintage:\n" | |
| " master_df[master_df['booking_vintage'] == '2024-10']\n\n" | |
| "Full year:\n" | |
| " master_df[master_df['booking_vintage'].str.startswith('2024')]\n\n" | |
| "Quarter (Q1=Jan-Mar, Q2=Apr-Jun, Q3=Jul-Sep, Q4=Oct-Dec):\n" | |
| " q3_2024 = ['2024-07','2024-08','2024-09']\n" | |
| " master_df[master_df['booking_vintage'].isin(q3_2024)]\n\n" | |
| "Half year (H1=Jan-Jun, H2=Jul-Dec):\n" | |
| " h1_2024 = ['2024-01','2024-02','2024-03','2024-04','2024-05','2024-06']\n" | |
| " h2_2024 = ['2024-07','2024-08','2024-09','2024-10','2024-11','2024-12']\n\n" | |
| "Comparison in ONE table (e.g. 2024 vs 2025 full year):\n" | |
| " v2024 = master_df[master_df['booking_vintage'].str.startswith('2024') & master_df['mob'].between(1,12)]\n" | |
| " v2025 = master_df[master_df['booking_vintage'].str.startswith('2025') & master_df['mob'].between(1,12)]\n" | |
| " ncl_2024 = (v2024['ncl_amount'].sum() / (v2024['balance'].sum() / 12)) * 100\n" | |
| " ncl_2025 = (v2025['ncl_amount'].sum() / (v2025['balance'].sum() / 12)) * 100\n" | |
| " result_1 = pd.DataFrame({'Period':['2024','2025'], 'Yr1_NCL_Rate':[ncl_2024, ncl_2025]})\n" | |
| " result_1['Delta_vs_2024'] = result_1['Yr1_NCL_Rate'] - result_1['Yr1_NCL_Rate'].iloc[0]\n\n" | |
| "Segment-level comparison in ONE table:\n" | |
| " # Compute metric per segment for each period, merge into one table\n" | |
| " def ncl_rate(df): return (df['ncl_amount'].sum() / (df['balance'].sum() / 12)) * 100\n" | |
| " seg_2024 = v2024.groupby('fico_band').apply(ncl_rate).reset_index()\n" | |
| " seg_2024.columns = ['fico_band','NCL_2024']\n" | |
| " seg_2025 = v2025.groupby('fico_band').apply(ncl_rate).reset_index()\n" | |
| " seg_2025.columns = ['fico_band','NCL_2025']\n" | |
| " result_2 = seg_2024.merge(seg_2025, on='fico_band')\n" | |
| " result_2['Delta'] = result_2['NCL_2025'] - result_2['NCL_2024']\n" | |
| " result_2 = result_2.sort_values('NCL_2025', ascending=False)\n\n" | |
| # ================================================================ | |
| # SECTION 5: QUESTION TYPE GUIDE | |
| # ================================================================ | |
| "================================================================\n" | |
| "SECTION 5: QUESTION TYPE GUIDE — HOW TO PLAN YOUR REQUIREMENTS\n" | |
| "================================================================\n\n" | |
| "Read the question carefully and identify which type it is. Then plan accordingly.\n\n" | |
| "TYPE 1 — DIRECT COMPARISON (e.g. 'compare NCL 2024 vs 2025', 'how did Q3 perform vs Q4')\n" | |
| " Planning approach:\n" | |
| " - Identify the two periods being compared\n" | |
| " - Identify which metrics are relevant (if not stated, use Yr1 NCL + 30+@3 as default)\n" | |
| " - Req 1: Overall metric comparison — ONE table with [Period, Metric, Delta]\n" | |
| " - Req 2 (optional): Same comparison broken down by most relevant segment\n" | |
| " - Req 3 (optional): Second segment breakdown or second metric family\n" | |
| " - DO NOT use two requirements to compute the same thing for two periods separately.\n" | |
| " Merge them into ONE table.\n\n" | |
| "TYPE 2 — FOCUSED EXPLORATION (e.g. 'analyse FICO band performance', 'how is Tier-2 doing')\n" | |
| " Planning approach:\n" | |
| " - Identify the segment of interest\n" | |
| " - Identify the time window (if not stated, use last 4 available quarters)\n" | |
| " - Req 1: Metric summary across that segment — all values ranked worst to best\n" | |
| " - Req 2: Trend over time for the highest-risk sub-segments identified in Req 1\n" | |
| " - Req 3 (optional): Cross-segment comparison (e.g. FICO x channel interaction)\n\n" | |
| "TYPE 3 — OPEN / DIAGNOSTIC (e.g. 'what is wrong with the portfolio', 'give me a full view')\n" | |
| " These questions require broad scanning across all segments and metrics simultaneously.\n" | |
| " This level of analysis requires significantly more compute, parallel execution, and\n" | |
| " multiple LLM reasoning loops that are beyond the current system design.\n" | |
| " ACTION: Return a single requirement that computes a high-level portfolio scorecard\n" | |
| " (all 4 metrics for last 2 years), and include a note in the description explaining\n" | |
| " that a full diagnostic requires an advanced multi-agent setup.\n\n" | |
| # ================================================================ | |
| # SECTION 6: WORKED EXAMPLES | |
| # ================================================================ | |
| "================================================================\n" | |
| "SECTION 6: WORKED EXAMPLES\n" | |
| "================================================================\n\n" | |
| "EXAMPLE A — TYPE 1 COMPARISON:\n" | |
| "User question: 'Compare Yr1 NCL for 2024 and 2025 across FICO bands'\n" | |
| "Planning:\n" | |
| " - Two periods: 2024 full year, 2025 full year\n" | |
| " - Metric: Yr1 NCL\n" | |
| " - Segment: fico_band\n" | |
| " - Req 1: Overall NCL rate for 2024 vs 2025 in one table\n" | |
| " - Req 2: NCL rate by fico_band for 2024 vs 2025 with delta, sorted worst first\n" | |
| "Expected output shape for Req 2:\n" | |
| " fico_band | NCL_2024 | NCL_2025 | Delta\n" | |
| " 600-650 | 8.2 | 7.1 | -1.1 (improvement)\n" | |
| " 650-700 | 5.4 | 4.8 | -0.6\n" | |
| " 700-750 | 3.1 | 2.9 | -0.2\n" | |
| " 750-800 | 1.8 | 1.5 | -0.3\n\n" | |
| "EXAMPLE B — TYPE 2 EXPLORATION:\n" | |
| "User question: 'How are different sourcing channels performing on early delinquency'\n" | |
| "Planning:\n" | |
| " - Segment: sourcing_channel\n" | |
| " - Metrics: 30+@3 and 30+@6 (early delinquency family)\n" | |
| " - Time window: last 4 quarters available in data\n" | |
| " - Req 1: 30+@3 and 30+@6 rates per channel, sorted worst first\n" | |
| " - Req 2: Trend of 30+@3 by channel across last 4 quarters (one row per quarter)\n" | |
| "Expected output shape for Req 1:\n" | |
| " sourcing_channel | rate_30_3 | rate_30_6 | risk_flag\n" | |
| " Broker | 7.8 | 11.2 | RED\n" | |
| " Online | 4.1 | 6.8 | AMBER\n" | |
| " Branch | 2.3 | 4.1 | GREEN\n\n" | |
| # ================================================================ | |
| # SECTION 7: CODE GENERATION RULES | |
| # ================================================================ | |
| "================================================================\n" | |
| "SECTION 7: CODE GENERATION RULES\n" | |
| "================================================================\n\n" | |
| "1. Always store final result in variable named exactly result_1, result_2, or result_3\n" | |
| " matching the sequence number of the requirement.\n\n" | |
| "2. Always use master_df when analysis needs both segment + performance columns.\n" | |
| " Use acq only for acquisition-only analysis (e.g. credit limit distribution).\n" | |
| " Use perf only for portfolio-wide performance with no segmentation.\n\n" | |
| "3. booking_vintage and reporting_month are strings. Never use .dt on them.\n" | |
| " Filter with == or .isin() or .str.startswith() only.\n\n" | |
| "4. Column names are EXACTLY as listed in Section 2. Do not guess or invent column names.\n" | |
| " If a column does not exist in the listed schema, do not use it.\n\n" | |
| "5. For comparisons: build ONE merged table with both periods as columns + delta.\n" | |
| " Do not produce two separate DataFrames for two periods.\n\n" | |
| "6. Add a risk_flag column where relevant using benchmarks from Section 3:\n" | |
| " df['risk_flag'] = pd.cut(df['rate'], bins=[0,3,6,100], labels=['GREEN','AMBER','RED'])\n\n" | |
| "7. Sort final result by the primary risk metric descending (worst first).\n\n" | |
| "8. In JSON, the code string must use \\n for newlines and escape all internal quotes.\n" | |
| " Do not put raw newlines inside the JSON string value.\n\n" | |
| "9. Keep code focused. No print statements. No plots. No file I/O.\n\n" | |
| # ================================================================ | |
| # SECTION 8: JSON OUTPUT FORMAT | |
| # ================================================================ | |
| "================================================================\n" | |
| "SECTION 8: OUTPUT FORMAT — RETURN ONLY THIS JSON, NOTHING ELSE\n" | |
| "================================================================\n\n" | |
| "{\n" | |
| ' "requirements": [\n' | |
| ' {\n' | |
| ' "sequence": 1,\n' | |
| ' "title": "Short descriptive title",\n' | |
| ' "description": "What this analysis does, why it answers the question, what the output table shows",\n' | |
| ' "code": "# pandas code here\\nresult_1 = ..."\n' | |
| ' }\n' | |
| ' ]\n' | |
| "}\n\n" | |
| "User Question: " + question | |
| ) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are a senior credit risk analyst who writes pandas code for portfolio analytics. " | |
| "You MUST return ONLY valid JSON with no text before or after it. " | |
| "Always name final result variables exactly result_1, result_2, or result_3. " | |
| "booking_vintage and reporting_month are string columns in YYYY-MM format — never use .dt on them. " | |
| "Always use master_df when you need both segment and performance data." | |
| ) | |
| }, | |
| {"role": "user", "content": prompt} | |
| ] | |
| response = client.chat.completions.create( | |
| model=HF_CODE_MODEL_ID, | |
| messages=messages, | |
| max_tokens=2048, | |
| temperature=0.1, | |
| top_p=0.95 | |
| ) | |
| response_text = ( | |
| response.choices[0].message.content | |
| if hasattr(response, "choices") | |
| else str(response) | |
| ) | |
| # Extract JSON | |
| spec = _extract_json(response_text) | |
| if not spec: | |
| return { | |
| "success": False, | |
| "requirements": [], | |
| "error": f"Failed to parse JSON from LLM response: {response_text[:200]}", | |
| "raw_response": response_text | |
| } | |
| requirements = spec.get("requirements", []) | |
| if not requirements: | |
| return { | |
| "success": False, | |
| "requirements": [], | |
| "error": f"LLM returned no requirements. Response keys: {list(spec.keys())}", | |
| "raw_response": response_text[:300] | |
| } | |
| print(f"[DEBUG] Generated {len(requirements)} requirements for question: {question[:80]}") | |
| for i, req in enumerate(requirements, 1): | |
| print(f" Req {i}: {req.get('title')}") | |
| return { | |
| "success": True, | |
| "requirements": requirements, | |
| "error": None | |
| } | |
| # --------------------------------------------------- | |
| # STEP 2: CODE EXECUTION (LOOPED) | |
| # --------------------------------------------------- | |
| def execute_requirement_code( | |
| code: str, | |
| acq: pd.DataFrame, | |
| perf: pd.DataFrame, | |
| master_df: pd.DataFrame, | |
| requirement_num: int | |
| ): | |
| """ | |
| Safely execute generated pandas code for a single requirement. | |
| FIXES: | |
| - FIX 1: Single namespace dict passed to exec() so variable assignments | |
| are reliably captured (Python quirk with separate globals/locals). | |
| - FIX 2: Smart result detection — checks named keys first, then scans | |
| for any new DataFrame/Series, then any non-None new variable. | |
| - FIX 3: _fix_pandas_compatibility() called before exec(). | |
| """ | |
| # FIX 3: Apply pandas compatibility patches BEFORE executing | |
| code = _fix_pandas_compatibility(code) | |
| # FIX 1: Merge everything into ONE dict so exec() writes back correctly. | |
| # When you pass separate globals + locals to exec(), Python's bytecode | |
| # compiler uses STORE_FAST which writes to an internal frame and does NOT | |
| # update the locals dict you passed in — so result variables always come | |
| # back None. Using a single namespace avoids this entirely. | |
| namespace = { | |
| "pd": pd, | |
| "generate_metric_view": generate_metric_view, | |
| "__builtins__": __builtins__, | |
| # Data available to generated code | |
| "acq": acq, | |
| "perf": perf, | |
| "master_df": master_df, | |
| } | |
| # Snapshot of keys before exec so we can detect newly created variables | |
| keys_before = set(namespace.keys()) | |
| try: | |
| print(f"[DEBUG] Executing requirement {requirement_num}...") | |
| print(f"[DEBUG] Code preview: {code[:120].strip()}...") | |
| exec(code, namespace) # FIX 1: single namespace | |
| # FIX 2: Smart result detection — three priority tiers | |
| # --- Tier 1: expected named result variables --- | |
| result = None | |
| expected_keys = [ | |
| f"result_{requirement_num}", | |
| "final_result", | |
| "result", | |
| ] | |
| for key in expected_keys: | |
| if key in namespace and namespace[key] is not None: | |
| result = namespace[key] | |
| print(f"[DEBUG] Found result in expected variable: '{key}'") | |
| break | |
| # --- Tier 2: any NEW DataFrame or Series created during exec --- | |
| if result is None: | |
| new_keys = set(namespace.keys()) - keys_before | |
| for key in new_keys: | |
| val = namespace[key] | |
| if isinstance(val, (pd.DataFrame, pd.Series)) and val is not None: | |
| result = val | |
| print(f"[DEBUG] Found result by scanning new DataFrame/Series: '{key}'") | |
| break | |
| # --- Tier 3: any new non-None, non-private variable --- | |
| if result is None: | |
| new_keys = set(namespace.keys()) - keys_before | |
| for key in sorted(new_keys): # sorted for determinism | |
| if key.startswith("_"): | |
| continue | |
| val = namespace[key] | |
| if val is not None: | |
| result = val | |
| print(f"[DEBUG] Fallback: found result in new variable: '{key}'") | |
| break | |
| if result is None: | |
| result = "Code executed successfully but no result variable was found in namespace." | |
| print(f"[DEBUG] Req {requirement_num} success. Result type: {type(result).__name__}") | |
| return { | |
| "success": True, | |
| "result": result, | |
| "error": None | |
| } | |
| except Exception as e: | |
| import traceback | |
| tb = traceback.format_exc() | |
| print(f"[DEBUG] Req {requirement_num} FAILED: {str(e)}") | |
| print(f"[DEBUG] Traceback:\n{tb}") | |
| return { | |
| "success": False, | |
| "result": None, | |
| "error": str(e) | |
| } | |
| def execute_all_requirements( | |
| requirements: list, | |
| acq: pd.DataFrame, | |
| perf: pd.DataFrame, | |
| master_df: pd.DataFrame | |
| ): | |
| """ | |
| Execute all requirements sequentially, building context. | |
| """ | |
| print(f"[DEBUG] Starting execution of {len(requirements)} requirements") | |
| all_results = [] | |
| context_text = "" | |
| for i, req in enumerate(requirements, 1): | |
| code = req.get("code", "") | |
| description = req.get("description", "") | |
| title = req.get("title", f"Analysis {i}") | |
| exec_result = execute_requirement_code(code, acq, perf, master_df, i) | |
| all_results.append({ | |
| "sequence": i, | |
| "title": title, | |
| "description": description, | |
| "code": code, | |
| # "success" is what app.py checks via res.get("success") | |
| # "execution_success" kept for backward compatibility | |
| "success": exec_result["success"], | |
| "execution_success": exec_result["success"], | |
| "result": exec_result["result"], | |
| "error": exec_result.get("error") | |
| }) | |
| # Build context for interpretation | |
| if exec_result["success"]: | |
| context_text += f"\nAnalysis {i} ({title}):\n{str(exec_result['result'])}\n" | |
| else: | |
| context_text += f"\nAnalysis {i} ({title}) FAILED:\n{exec_result['error']}\n" | |
| return all_results, context_text | |
| # --------------------------------------------------- | |
| # STEP 3: RESULT INTERPRETATION | |
| # --------------------------------------------------- | |
| def interpret_all_results( | |
| question: str, | |
| all_results: list, | |
| context_text: str | |
| ): | |
| """ | |
| Senior risk analyst LLM interprets all results holistically. | |
| """ | |
| print(f"[DEBUG] Interpreting results for {len(all_results)} analyses") | |
| print(f"[DEBUG] Successful executions: {sum(1 for r in all_results if r.get('success'))}") | |
| client = _get_hf_client() | |
| # Format all analyses | |
| analyses_text = "" | |
| for res in all_results: | |
| analyses_text += f"\n{'=' * 60}\n" | |
| analyses_text += f"Analysis {res['sequence']}: {res['title']}\n" | |
| analyses_text += f"Description: {res['description']}\n" | |
| analyses_text += f"{'=' * 60}\n" | |
| if res["success"]: | |
| analyses_text += f"Result:\n{str(res['result'])}\n" | |
| else: | |
| analyses_text += f"Execution Error: {res['error']}\n" | |
| prompt = ( | |
| "You are a senior retail credit risk analyst with 15+ years of portfolio management experience.\n\n" | |
| "You have just run a set of pandas analyses on a credit portfolio. " | |
| "Your task is to interpret the results and deliver a structured, specific, " | |
| "actionable risk assessment. Do not repeat raw numbers from the tables — " | |
| "interpret what they mean for the business.\n\n" | |
| "RISK BENCHMARKS FOR REFERENCE:\n" | |
| " 30+@3 : Green < 3% | Amber 3-6% | Red > 6%\n" | |
| " 30+@6 : Green < 5% | Amber 5-9% | Red > 9%\n" | |
| " 60+@6 : Green < 2% | Amber 2-4% | Red > 4%\n" | |
| " Yr1 NCL: Green < 3% | Amber 3-6% | Red > 6%\n\n" | |
| "STRUCTURE YOUR RESPONSE IN EXACTLY THESE 4 SECTIONS:\n\n" | |
| "1. HEADLINE FINDING (2-3 sentences)\n" | |
| " The single most important thing the data shows. Be specific — name the metric, " | |
| " the segment or vintage, and the direction (improving/worsening).\n\n" | |
| "2. KEY FINDINGS (3-5 bullet points)\n" | |
| " Each bullet must:\n" | |
| " - Name a specific segment, vintage, or metric (not generic statements)\n" | |
| " - State the actual value and what benchmark zone it falls in (Green/Amber/Red)\n" | |
| " - State whether it is improving, stable, or worsening vs the comparison period\n" | |
| " Example: '• Broker channel 30+@3 is 7.8% (RED) — worsening by +2.1pp vs 2024, " | |
| " suggesting deteriorating acquisition quality in this channel.'\n\n" | |
| "3. RED FLAGS (list only if any metric is in Amber or Red zone)\n" | |
| " For each red flag:\n" | |
| " - Name the segment/vintage and metric\n" | |
| " - State the value and benchmark zone\n" | |
| " - Give one specific business hypothesis for why this is happening\n" | |
| " If no red flags: write 'No red flags — all metrics within Green benchmarks.'\n\n" | |
| "4. RECOMMENDATIONS (2-4 actionable items)\n" | |
| " Each recommendation must be:\n" | |
| " - Tied to a specific finding above (not generic advice)\n" | |
| " - Actionable by a risk or credit team (tighten policy, adjust limit, investigate, monitor)\n" | |
| " - Prioritised: label each as IMMEDIATE, SHORT-TERM, or MONITOR\n" | |
| " Example: '[IMMEDIATE] Tighten credit bureau cut-off for Broker channel acquisitions — " | |
| " 30+@3 at 7.8% exceeds Red threshold and is trending upward.'\n\n" | |
| "User's Original Question:\n" + question + "\n\n" | |
| "Analyses Performed:\n" + analyses_text + "\n\n" | |
| "Provide your structured interpretation now:" | |
| ) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are a senior credit risk analyst delivering a structured portfolio risk assessment. " | |
| "Be specific — always name segments, vintages, and metrics by name. " | |
| "Always reference benchmark zones (Green/Amber/Red). " | |
| "Never give generic advice. Every recommendation must trace back to a specific data finding." | |
| ) | |
| }, | |
| {"role": "user", "content": prompt} | |
| ] | |
| response = client.chat.completions.create( | |
| model=HF_MODEL_ID, | |
| messages=messages, | |
| max_tokens=1024, | |
| temperature=0.3, | |
| top_p=0.95 | |
| ) | |
| interpretation = ( | |
| response.choices[0].message.content | |
| if hasattr(response, "choices") | |
| else str(response) | |
| ) | |
| return interpretation | |
| # --------------------------------------------------- | |
| # MASTER ORCHESTRATOR FUNCTION | |
| # --------------------------------------------------- | |
| def run_deep_dive_analysis( | |
| question: str, | |
| acq: pd.DataFrame, | |
| perf: pd.DataFrame, | |
| master_df: pd.DataFrame | |
| ): | |
| """ | |
| End-to-end deep dive analysis: | |
| 1. Break question into 1-3 structured requirements | |
| 2. Generate code for each requirement | |
| 3. Execute each requirement's code sequentially | |
| 4. Synthesize results and provide senior analyst interpretation | |
| """ | |
| print(f"\n[DEEP DIVE START] Question: {question}") | |
| print(f"[DEBUG] Data shapes - Acq: {acq.shape}, Perf: {perf.shape}, Master: {master_df.shape}") | |
| # Step 1: Generate requirements | |
| print("[DEBUG] Step 1: Generating requirements...") | |
| req_response = generate_analysis_requirements(question, acq, perf, master_df) | |
| if not req_response["success"]: | |
| return { | |
| "success": False, | |
| "question": question, | |
| "requirements": [], | |
| "all_results": [], | |
| "interpretation": f"Failed to generate requirements: {req_response['error']}", | |
| "error": req_response["error"] | |
| } | |
| requirements = req_response["requirements"][:3] # Cap at 3 | |
| # Step 2 & 3: Execute all requirements | |
| print(f"[DEBUG] Step 2-3: Executing {len(requirements)} requirements...") | |
| all_results, context_text = execute_all_requirements(requirements, acq, perf, master_df) | |
| # Step 4: Interpret results | |
| print("[DEBUG] Step 4: Interpreting all results...") | |
| interpretation = interpret_all_results(question, all_results, context_text) | |
| print("[DEEP DIVE END] Analysis complete\n") | |
| return { | |
| "success": True, | |
| "question": question, | |
| "requirements": requirements, | |
| "all_results": all_results, | |
| "interpretation": interpretation, | |
| "error": None | |
| } | |