# app.py import pandas as pd import gradio as gr # --------------------------------------------------- # HELPERS # --------------------------------------------------- from helper.vintage_helpers import ( create_booking_vintage ) from helper.data_merger import ( merge_acq_perf ) # --------------------------------------------------- # METRICS # --------------------------------------------------- from metrics.mix_metrics import ( calculate_vintage_mix, calculate_limit_mix ) # --------------------------------------------------- # ANALYTICS # --------------------------------------------------- from analytics.performance_analysis import ( generate_metric_view ) from analytics.ai_assistant import ( generate_ai_answer ) from analytics.deep_dive_agentic import ( run_deep_dive_analysis ) # --------------------------------------------------- # VISUALIZATIONS - VINTAGE CURVES # --------------------------------------------------- from visualizations.vintage_curves import ( generate_delinquency_metric_chart, generate_multi_metric_comparison, generate_segment_delinquency_curve ) # --------------------------------------------------- # VISUALIZATIONS - SEGMENT RANKING # --------------------------------------------------- from visualizations.segment_ranking import ( generate_segment_risk_heatmap, generate_segment_risk_ranking, generate_multi_category_risk_comparison, calculate_portfolio_risk_summary ) # --------------------------------------------------- # LOAD DATA # --------------------------------------------------- acq = pd.read_csv( "data/acquisition.csv" ) perf = pd.read_csv( "data/performance.csv" ) # --------------------------------------------------- # CREATE BOOKING VINTAGE # --------------------------------------------------- acq = create_booking_vintage( acq, booking_date_col="booking_date" ) # --------------------------------------------------- # CREATE MASTER PERFORMANCE DATASET # --------------------------------------------------- master_df = merge_acq_perf( acq_df=acq, perf_df=perf ) # --------------------------------------------------- # ACQUISITION ANALYSIS # --------------------------------------------------- def run_acquisition_analysis( analysis_type, category ): # ----------------------------------------- # PORTFOLIO MIX # ----------------------------------------- if analysis_type == "Portfolio Mix": result = ( acq.groupby( ["booking_vintage", category] ) .agg( count=("account_id", "nunique"), balance=("credit_limit", "sum") ) .reset_index() ) vintage_total = ( result.groupby("booking_vintage")["count"] .transform("sum") ) result["rate"] = ( result["count"] / vintage_total ) * 100 result["rate"] = ( result["rate"] .round(2) ) # ----------------------------------------- # CREDIT LINE CONCENTRATION # ----------------------------------------- elif analysis_type == "Credit Line Concentration": result = ( acq.groupby( ["booking_vintage", category] ) .agg( count=("account_id", "nunique"), balance=("credit_limit", "sum") ) .reset_index() ) vintage_total = ( result.groupby("booking_vintage")["balance"] .transform("sum") ) result["rate"] = ( result["balance"] / vintage_total ) * 100 result["rate"] = ( result["rate"] .round(2) ) else: return pd.DataFrame() # ----------------------------------------- # STANDARDIZED OUTPUT # ----------------------------------------- result = result.rename( columns={ "booking_vintage": "Vintage", category: "Category", "count": "Count", "balance": "Balance", "rate": "Rate" } ) return result[ [ "Vintage", "Category", "Count", "Balance", "Rate" ] ] # --------------------------------------------------- # PERFORMANCE ANALYSIS # --------------------------------------------------- def run_performance_analysis( metric_name, view_level ): # ----------------------------------------- # VIEW MAPPING # ----------------------------------------- view_mapping = { "Overall": None, "Channel": "sourcing_channel", "FICO": "fico_band", "City Tier": "city_tier", "Occupation": "occupation_type" } group_col = view_mapping[ view_level ] # ----------------------------------------- # CALL ANALYTICS ENGINE # ----------------------------------------- result = generate_metric_view( df=master_df, metric_name=metric_name, group_col=group_col ) # ----------------------------------------- # STANDARDIZE OUTPUT # ----------------------------------------- if group_col is not None: result = result.rename( columns={ group_col: "Category" } ) else: result["Category"] = "Overall" # ----------------------------------------- # IDENTIFY RATE COLUMN # ----------------------------------------- rate_col = [ col for col in result.columns if "rate" in col.lower() ][0] # ----------------------------------------- # OUTPUT FORMAT # ----------------------------------------- final_result = pd.DataFrame() final_result["Vintage"] = ( result["booking_vintage"] ) final_result["Category"] = ( result["Category"] ) final_result["Count"] = ( result["total_accounts"] ) final_result["Balance"] = ( result["total_balance"] ) final_result["Rate"] = ( result[rate_col] .round(2) ) return final_result # --------------------------------------------------- # VINTAGE CURVES ANALYSIS # --------------------------------------------------- def generate_vintage_curve_single( metric_name ): """Generate single vintage curve for a metric.""" try: fig = generate_delinquency_metric_chart( df=master_df, metric_name=metric_name, chart_type="line" ) return fig except Exception as e: return f"Error generating vintage curve: {str(e)}" def generate_vintage_curves_comparison(): """Generate comparison of all vintage curves.""" try: fig = generate_multi_metric_comparison( df=master_df, metrics=["30+@3", "30+@6", "60+@6", "Yr1 NCL"] ) return fig except Exception as e: return f"Error generating comparison: {str(e)}" def generate_segmented_vintage_curve( metric_name, category ): """Generate vintage curve segmented by category.""" try: fig = generate_segment_delinquency_curve( df=master_df, metric_name=metric_name, category=category ) return fig except Exception as e: return f"Error generating segmented curve: {str(e)}" # --------------------------------------------------- # SEGMENT RANKING ANALYSIS # --------------------------------------------------- def generate_segment_risk_heatmap_chart(): """Generate risk heatmap across all segments and metrics.""" try: fig = generate_segment_risk_heatmap( df=master_df ) return fig except Exception as e: return f"Error generating heatmap: {str(e)}" def generate_high_risk_segments_ranking( metric_name, category ): """Generate ranking of high-risk segments.""" try: fig = generate_segment_risk_ranking( df=master_df, metric_name=metric_name, category=category, top_n=10 ) return fig except Exception as e: return f"Error generating ranking: {str(e)}" def generate_multi_category_comparison( metric_name ): """Generate risk comparison across all categories.""" try: fig = generate_multi_category_risk_comparison( df=master_df, metric_name=metric_name ) return fig except Exception as e: return f"Error generating comparison: {str(e)}" def generate_portfolio_summary(): """Generate portfolio risk summary.""" try: summary_df = calculate_portfolio_risk_summary( df=master_df ) return summary_df except Exception as e: return f"Error generating summary: {str(e)}" def ask_ai_question(question, as_of_month, segment, history): if not question or str(question).strip() == "": return history or [], history or [], "" try: answer = generate_ai_answer( question=question, df=master_df, as_of_month=as_of_month, segment=segment if segment != "" else None ) except Exception as exc: answer = f"AI error: {str(exc)}" history = history or [] history.append({"role": "user", "content": question}) history.append({"role": "assistant", "content": answer}) return history, history, "" def run_deep_dive_on_question(question): if not question or str(question).strip() == "": return "Please enter a question." try: result = run_deep_dive_analysis(question, acq, perf, master_df) # Check if analysis was successful if not result["success"]: return f"**Error in Deep Dive Analysis:**\n{result.get('error', 'Unknown error')}" # Check if we have empty results if not result.get("requirements") and not result.get("all_results"): return ( f"**Question:** {result['question']}\n\n" f"⚠️ **No analysis requirements generated.** \n" f"This may indicate:\n" f"- The LLM couldn't parse the question clearly\n" f"- API connectivity issue\n" f"\nPlease try:\n" f"- Rephrasing your question more specifically\n" f"- Asking about specific metrics (30+@6, Yr1 NCL) or segments (FICO, Channel, City Tier)\n" f"- Breaking down complex questions into simpler components\n\n" f"**Raw Response:** {result.get('interpretation', 'No interpretation')}" ) output = f"**Question:** {result['question']}\n\n" # Display each requirement and its result if result.get("requirements"): output += "## Analysis Requirements\n\n" for i, req in enumerate(result["requirements"], 1): output += f"**Requirement {i}:** {req.get('title', 'Untitled')}\n" output += f"- {req.get('description', 'No description')}\n\n" if "code" in req: output += f"Generated Code:\n```python\n{req['code']}\n```\n\n" else: output += "⚠️ No requirements generated\n\n" # Display execution results if result.get("all_results"): output += "## Execution Results\n\n" for i, res in enumerate(result["all_results"], 1): output += f"**Requirement {i} Results:**\n" if res.get("success"): output += f"✓ Executed successfully\n" output += f"```\n{res.get('result', 'No result')}\n```\n\n" else: output += f"✗ Execution failed: {res.get('error', 'Unknown error')}\n\n" else: output += "⚠️ No execution results\n\n" # Display synthesis & insights if result.get("interpretation"): output += "## Analyst Interpretation\n\n" output += f"{result['interpretation']}" else: output += "⚠️ No interpretation available" return output except Exception as exc: import traceback return f"**Deep Dive Analysis Error:**\n```\n{traceback.format_exc()}\n```" # --------------------------------------------------- # PORTFOLIO OVERVIEW (Calendar Snapshot) # --------------------------------------------------- def _detect_date_column(df): candidates = [ "reporting_month", "observation_date", "observation_month", "obs_date", "date", "calendar_month", "month", "report_date" ] for c in candidates: if c in df.columns: return c return None def get_calendar_months(): date_col = _detect_date_column(master_df) if date_col is None: return [] ser = pd.to_datetime(master_df[date_col], errors="coerce") months = ser.dt.to_period("M").astype(str).dropna().unique().tolist() months.sort() return months def _filter_master_by_month(as_of_month): # as_of_month expected like "YYYY-MM" date_col = _detect_date_column(master_df) if date_col is None or not as_of_month: return master_df.copy() ser = pd.to_datetime(master_df[date_col], errors="coerce").dt.to_period("M").astype(str) return master_df[ser == as_of_month].copy() def generate_portfolio_overview(as_of_month, segment): """ Returns a small DataFrame with key portfolio snapshot metrics for the selected calendar month and segment. Metrics: Total Accounts, Open Accounts (balance>0), Bad Accounts (dpd>=30), Overall NCL Rate (dollar %), Average FICO. """ df = _filter_master_by_month(as_of_month) # If a segmentation column is provided, return per-segment breakdown valid_segments = [ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ] if segment in valid_segments and segment in df.columns: grp = segment total_accounts = df.groupby(grp)["account_id"].nunique() if "balance" in df.columns: open_accounts = ( df.loc[df["balance"] > 0].groupby(grp)["account_id"].nunique() ) total_balance = df.groupby(grp)["balance"].sum() else: open_accounts = pd.Series(0, index=total_accounts.index) total_balance = pd.Series(0, index=total_accounts.index) if "dpd" in df.columns: bad_accounts = ( df.loc[df["dpd"].fillna(0) >= 30].groupby(grp)["account_id"].nunique() ) bad_balance = ( df.assign(_bad_balance=df["balance"].where(df["dpd"].fillna(0) >= 30, 0)) .groupby(grp)["_bad_balance"].sum() ) if "balance" in df.columns else pd.Series(0, index=total_accounts.index) else: bad_accounts = pd.Series(0, index=total_accounts.index) bad_balance = pd.Series(0, index=total_accounts.index) # NCL if present ncl_cols = [c for c in df.columns if "ncl" in c.lower()] if len(ncl_cols) > 0: total_ncl = df.groupby(grp)[ncl_cols[0]].sum() ncl_rate = (total_ncl / total_balance * 100).round(2).fillna(0) else: ncl_rate = (bad_balance / total_balance * 100).round(2).fillna(0) # average fico per group if available if "fico_score" in df.columns: avg_fico = df.groupby(grp)["fico_score"].mean().round(1) else: avg_fico = pd.Series(float("nan"), index=total_accounts.index) result = pd.DataFrame({ "Segment": total_accounts.index, "Total_Accounts": total_accounts.values, "Open_Accounts": open_accounts.reindex(total_accounts.index).fillna(0).astype(int).values, "Bad_Accounts": bad_accounts.reindex(total_accounts.index).fillna(0).astype(int).values, "Total_Balance": total_balance.reindex(total_accounts.index).fillna(0).values, "NCL_Rate_pct": ncl_rate.reindex(total_accounts.index).fillna(0).values, "Avg_FICO": avg_fico.reindex(total_accounts.index).fillna(float("nan")).values }) # Sort by NCL rate descending result = result.sort_values("NCL_Rate_pct", ascending=False).reset_index(drop=True) return result # Default: single-line overview total_accounts = df["account_id"].nunique() if "account_id" in df.columns else 0 open_accounts = ( df.loc[df["balance"] > 0, "account_id"].nunique() if "balance" in df.columns else total_accounts ) bad_accounts = ( df.loc[df["dpd"].fillna(0) >= 30, "account_id"].nunique() if "dpd" in df.columns else 0 ) # overall NCL rate (dollar-based) fallback logic ncl_cols = [c for c in df.columns if "ncl" in c.lower()] overall_ncl_rate = None if len(ncl_cols) > 0 and "balance" in df.columns: ncl_sum = df[ncl_cols[0]].sum(skipna=True) bal_sum = df["balance"].sum(skipna=True) overall_ncl_rate = (ncl_sum / bal_sum * 100) if bal_sum > 0 else None else: # fallback: use bad balance / total balance as proxy if "balance" in df.columns and "dpd" in df.columns: bad_bal = df.loc[df["dpd"].fillna(0) >= 30, "balance"].sum() bal_sum = df["balance"].sum() overall_ncl_rate = (bad_bal / bal_sum * 100) if bal_sum > 0 else None if overall_ncl_rate is None: overall_ncl_rate = float("nan") else: overall_ncl_rate = round(overall_ncl_rate, 2) # average fico avg_fico = None if "fico_score" in df.columns: avg_fico = round(df["fico_score"].dropna().mean(), 1) elif "fico_band" in df.columns: def band_mid(b): try: parts = b.split("-") return (int(parts[0]) + int(parts[1])) / 2 except Exception: return None mid_vals = df["fico_band"].dropna().apply(band_mid).dropna() avg_fico = round(mid_vals.mean(), 1) if not mid_vals.empty else float("nan") else: avg_fico = float("nan") overview = pd.DataFrame({ "Metric": [ "As Of Month", "Total Accounts", "Open Accounts", "Bad Accounts (dpd>=30)", "Overall NCL Rate (%)", "Average FICO" ], "Value": [ as_of_month if as_of_month else "All", int(total_accounts), int(open_accounts), int(bad_accounts), overall_ncl_rate, avg_fico ] }) return overview # --------------------------------------------------- # DYNAMIC DROPDOWNS # --------------------------------------------------- def update_analysis_dropdown( dataset ): # ----------------------------------------- # ACQUISITION # ----------------------------------------- if dataset == "Acquisition": return gr.update( choices=[ "Portfolio Mix", "Credit Line Concentration" ], value="Portfolio Mix" ) # ----------------------------------------- # PERFORMANCE # ----------------------------------------- elif dataset == "Performance": return gr.update( choices=[ "30+@3", "30+@6", "60+@6", "Yr1 NCL" ], value="30+@6" ) def update_category_dropdown( dataset ): # ----------------------------------------- # ACQUISITION # ----------------------------------------- if dataset == "Acquisition": return gr.update( choices=[ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ], value="fico_band" ) # ----------------------------------------- # PERFORMANCE # ----------------------------------------- elif dataset == "Performance": return gr.update( choices=[ "Overall", "Channel", "FICO", "City Tier", "Occupation" ], value="Overall" ) # --------------------------------------------------- # MASTER ROUTER # --------------------------------------------------- def run_analysis( dataset, analysis, category ): # ----------------------------------------- # ACQUISITION # ----------------------------------------- if dataset == "Acquisition": return run_acquisition_analysis( analysis_type=analysis, category=category ) # ----------------------------------------- # PERFORMANCE # ----------------------------------------- elif dataset == "Performance": return run_performance_analysis( metric_name=analysis, view_level=category ) else: return pd.DataFrame() # --------------------------------------------------- # GRADIO UI # --------------------------------------------------- with gr.Blocks() as app: gr.Markdown( "# Risk Analytics Manager Agent - Phase 2" ) with gr.Tabs(): # ================================================= # TAB 1: BASIC ANALYSIS (Phase 1) # ================================================= with gr.TabItem("📊 Basic Analysis"): gr.Markdown( "## Phase 1: Acquisition & Performance Analysis" ) with gr.Row(): dataset_dropdown = gr.Dropdown( choices=[ "Acquisition", "Performance" ], value="Acquisition", label="Dataset" ) analysis_dropdown = gr.Dropdown( choices=[ "Portfolio Mix", "Credit Line Concentration" ], value="Portfolio Mix", label="Analysis" ) category_dropdown = gr.Dropdown( choices=[ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ], value="fico_band", label="Category / View" ) # ----------------------------------------- # DYNAMIC DROPDOWNS # ----------------------------------------- dataset_dropdown.change( fn=update_analysis_dropdown, inputs=dataset_dropdown, outputs=analysis_dropdown ) dataset_dropdown.change( fn=update_category_dropdown, inputs=dataset_dropdown, outputs=category_dropdown ) # ----------------------------------------- # RUN BUTTON # ----------------------------------------- run_button = gr.Button( "Run Analysis", variant="primary" ) output_table = gr.Dataframe() run_button.click( fn=run_analysis, inputs=[ dataset_dropdown, analysis_dropdown, category_dropdown ], outputs=output_table ) # ================================================= # TAB 2: VINTAGE CURVES (Phase 2) # ================================================= with gr.TabItem("📈 Vintage Curves"): gr.Markdown( "## Phase 2: Vintage Delinquency Curves Analysis" ) with gr.Row(): metric_dropdown = gr.Dropdown( choices=[ "30+@3", "30+@6", "60+@6", "Yr1 NCL" ], value="30+@6", label="Delinquency Metric" ) vintage_chart_type = gr.Radio( choices=["Single Metric", "All Metrics Comparison"], value="Single Metric", label="Chart Type" ) def update_vintage_view(metric, chart_type): if chart_type == "Single Metric": return generate_vintage_curve_single(metric) else: return generate_vintage_curves_comparison() vintage_chart = gr.Plot( label="Vintage Curve" ) gen_vintage_btn = gr.Button( "Generate Vintage Curve", variant="primary" ) gen_vintage_btn.click( fn=update_vintage_view, inputs=[metric_dropdown, vintage_chart_type], outputs=vintage_chart ) gr.Markdown( "### Segmented Vintage Curves" ) with gr.Row(): segment_metric = gr.Dropdown( choices=[ "30+@3", "30+@6", "60+@6", "Yr1 NCL" ], value="30+@6", label="Metric" ) segment_category = gr.Dropdown( choices=[ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ], value="fico_band", label="Category" ) segmented_chart = gr.Plot( label="Segmented Vintage Curve" ) gen_segment_btn = gr.Button( "Generate Segmented Curve", variant="primary" ) gen_segment_btn.click( fn=generate_segmented_vintage_curve, inputs=[segment_metric, segment_category], outputs=segmented_chart ) # ================================================= # TAB 3: SEGMENT RANKING (Phase 2) # ================================================= with gr.TabItem("⚠️ Segment Ranking"): gr.Markdown( "## Phase 2: High-Risk Segment Analysis" ) # --------- HEATMAP SECTION --------- gr.Markdown( "### 🔥 Overall Risk Heatmap" ) gr.Markdown( "Risk scores across all delinquency metrics and segments" ) heatmap_chart = gr.Plot( label="Risk Heatmap" ) gen_heatmap_btn = gr.Button( "Generate Risk Heatmap", variant="primary" ) gen_heatmap_btn.click( fn=generate_segment_risk_heatmap_chart, outputs=heatmap_chart ) gr.Markdown( "---" ) # --------- HIGH-RISK RANKING SECTION --------- gr.Markdown( "### 📊 High-Risk Segments Ranking" ) with gr.Row(): ranking_metric = gr.Dropdown( choices=[ "30+@3", "30+@6", "60+@6", "Yr1 NCL" ], value="30+@6", label="Metric" ) ranking_category = gr.Dropdown( choices=[ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ], value="fico_band", label="Category" ) ranking_chart = gr.Plot( label="High-Risk Segments" ) gen_ranking_btn = gr.Button( "Generate Risk Ranking", variant="primary" ) gen_ranking_btn.click( fn=generate_high_risk_segments_ranking, inputs=[ranking_metric, ranking_category], outputs=ranking_chart ) gr.Markdown( "---" ) # --------- MULTI-CATEGORY COMPARISON --------- gr.Markdown( "### 🔀 Cross-Category Risk Comparison" ) comparison_metric = gr.Dropdown( choices=[ "30+@3", "30+@6", "60+@6", "Yr1 NCL" ], value="30+@6", label="Metric" ) comparison_chart = gr.Plot( label="Multi-Category Comparison" ) gen_comparison_btn = gr.Button( "Generate Comparison", variant="primary" ) gen_comparison_btn.click( fn=generate_multi_category_comparison, inputs=comparison_metric, outputs=comparison_chart ) gr.Markdown( "---" ) # --------- PORTFOLIO SUMMARY --------- gr.Markdown( "### 📋 Portfolio Risk Summary" ) summary_table = gr.Dataframe( label="Risk Summary" ) gen_summary_btn = gr.Button( "Generate Summary", variant="primary" ) gen_summary_btn.click( fn=generate_portfolio_summary, outputs=summary_table ) # ================================================= # TAB 4: PORTFOLIO OVERVIEW (Calendar Snapshot) # ================================================= with gr.TabItem("📅 Portfolio Overview"): gr.Markdown("## Portfolio Snapshot by Calendar Month") with gr.Row(): calendar_month_dropdown = gr.Dropdown( choices=get_calendar_months(), value=(get_calendar_months()[-1] if len(get_calendar_months()) > 0 else None), label="Calendar Month (YYYY-MM)" ) overview_segment_dropdown = gr.Dropdown( choices=[ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ], value="fico_band", label="Segment (for drill)" ) gen_overview_btn = gr.Button("Generate Snapshot", variant="primary") overview_table = gr.Dataframe(label="Portfolio Overview") gen_overview_btn.click( fn=generate_portfolio_overview, inputs=[calendar_month_dropdown, overview_segment_dropdown], outputs=overview_table ) # ================================================= # TAB 5: DEEP DIVE AGENTIC ANALYSIS # ================================================= with gr.TabItem("🔬 Deep Dive Analysis"): gr.Markdown( "## Agentic Code Generation & Execution\n\n" "Ask a complex question and the AI will:\n" "1. Generate pandas code to analyze the data\n" "2. Execute the code safely\n" "3. Interpret the results with risk management insights" ) deep_dive_question = gr.Textbox( lines=4, placeholder="E.g., 'Compare 30+ delinquency trends across fico bands and identify worst performing vintages.'", label="Analysis Question" ) deep_dive_run_btn = gr.Button("Run Deep Dive Analysis", variant="primary") deep_dive_output = gr.Markdown(label="Analysis Output") deep_dive_run_btn.click( fn=run_deep_dive_on_question, inputs=deep_dive_question, outputs=deep_dive_output ) # ================================================= # TAB 6: CHAT WITH YOUR DATA # ================================================= with gr.TabItem("💬 Chat with Your Data"): gr.Markdown( "## Ask questions in plain language and get risk manager insights" ) with gr.Row(): ai_month_dropdown = gr.Dropdown( choices=["All"] + get_calendar_months(), value="All" if len(get_calendar_months()) > 0 else None, label="As Of Month (YYYY-MM or All)" ) ai_segment_dropdown = gr.Dropdown( choices=["", "fico_band", "sourcing_channel", "city_tier", "occupation_type"], value="", label="Segment Context" ) ai_question = gr.Textbox( lines=3, placeholder="Ask about NCL, vintage performance, high-risk segments, or portfolio trends..." ) ai_chatbot = gr.Chatbot() ai_history = gr.State([]) ai_ask_btn = gr.Button("Ask AI", variant="primary") ai_ask_btn.click( fn=ask_ai_question, inputs=[ai_question, ai_month_dropdown, ai_segment_dropdown, ai_history], outputs=[ai_chatbot, ai_history, ai_question] ) app.launch()