File size: 17,011 Bytes
67bf425
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
"""
page5_optimisation.py β€” 10% Bandwidth Optimisation, Efficiency Metrics, Baseline Comparison
"""

import streamlit as st
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from styles import (inject_global_css, page_header, metric_row,
                    WCO_GOLD, WCO_BLUE, WCO_GREEN, WCO_RED,
                    WCO_CARD_BG, WCO_BORDER, WCO_MUTED)
from simulation_engine import RISK_AREAS


def efficiency_comparison_chart(hybrid: dict, baseline: dict):
    categories = ["Selection Rate", "Detection Rate", "Precision", "Efficiency Index"]
    b_vals = [
        baseline["selection_rate"],
        baseline["detection_rate"],
        baseline["precision"],
        baseline["efficiency_index"],
    ]
    h_vals = [
        hybrid["selection_rate"],
        hybrid["detection_rate"],
        hybrid["precision"],
        hybrid["efficiency_index"],
    ]

    fig = go.Figure()
    fig.add_trace(go.Bar(name="πŸ“‰ Static Baseline (DATE only)",
                         x=categories, y=b_vals,
                         marker_color=WCO_RED, opacity=0.80,
                         text=[f"{v:.3f}" for v in b_vals],
                         textposition="outside", textfont=dict(color="#D0DCF0")))
    fig.add_trace(go.Bar(name="πŸ“ˆ No-CelH Hybrid Model",
                         x=categories, y=h_vals,
                         marker_color=WCO_GREEN, opacity=0.85,
                         text=[f"{v:.3f}" for v in h_vals],
                         textposition="outside", textfont=dict(color="#D0DCF0")))
    fig.update_layout(
        paper_bgcolor="#070E1C", plot_bgcolor="#0B1220", barmode="group",
        font=dict(family="IBM Plex Sans", color="#D0DCF0", size=12), height=400,
        title=dict(text="<b>Efficiency Metrics: Static Baseline vs Hybrid Self-Learning Model</b>",
                   font=dict(color=WCO_GOLD, size=15, family="Playfair Display"), x=0.5),
        xaxis=dict(gridcolor="#1E3A6E"),
        yaxis=dict(gridcolor="#1E3A6E", title="Metric Value", range=[0, 1.1]),
        legend=dict(bgcolor="#0F1C35", bordercolor=WCO_BORDER, font=dict(size=12)),
        margin=dict(l=55, r=20, t=60, b=50),
    )
    return fig


def bandwidth_optimisation_chart(df):
    """Show how risk score thresholds map to channel decisions."""
    n = len(df)
    thresholds = np.linspace(0.1, 0.9, 30)
    rows = []
    for t in thresholds:
        selected = df[df["fraud_score"] >= t]
        n_sel    = len(selected)
        n_fraud  = (selected["is_illicit"] == 1).sum()
        all_fraud = (df["is_illicit"] == 1).sum()
        sel_rate  = n_sel / n if n else 0
        det_rate  = n_fraud / all_fraud if all_fraud else 0
        precision = n_fraud / n_sel if n_sel else 0
        eff       = det_rate / sel_rate if sel_rate > 0 else 0
        rows.append(dict(threshold=round(t,2), sel_rate=sel_rate,
                         det_rate=det_rate, precision=precision, efficiency=eff))
    opt_df = pd.DataFrame(rows)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=opt_df["threshold"], y=opt_df["sel_rate"],
                             name="Selection Rate", line=dict(color=WCO_BLUE, width=2)))
    fig.add_trace(go.Scatter(x=opt_df["threshold"], y=opt_df["det_rate"],
                             name="Detection Rate", line=dict(color=WCO_GREEN, width=2)))
    fig.add_trace(go.Scatter(x=opt_df["threshold"], y=opt_df["precision"],
                             name="Precision", line=dict(color=WCO_GOLD, width=2)))
    fig.add_trace(go.Scatter(x=opt_df["threshold"], y=opt_df["efficiency"].clip(0,2),
                             name="Efficiency Index", line=dict(color=WCO_RED, width=2.5, dash="dot")))
    # 10% bandwidth marker
    fig.add_vline(x=opt_df.iloc[(opt_df["sel_rate"] - 0.10).abs().argsort().iloc[0]]["threshold"],
                  line=dict(color=WCO_GOLD, dash="dash", width=1.5),
                  annotation_text="10% Bandwidth β†’", annotation_font_color=WCO_GOLD)
    fig.update_layout(
        paper_bgcolor="#070E1C", plot_bgcolor="#0B1220",
        font=dict(family="IBM Plex Sans", color="#D0DCF0", size=11), height=380,
        title=dict(text="<b>Bandwidth Optimisation Curve β€” Risk Score Threshold Analysis</b>",
                   font=dict(color=WCO_GOLD, size=14, family="Playfair Display"), x=0.5),
        xaxis=dict(title="Risk Score Threshold", gridcolor="#1E3A6E"),
        yaxis=dict(title="Rate / Index", gridcolor="#1E3A6E", range=[0, 2.1]),
        legend=dict(bgcolor="#0F1C35", bordercolor=WCO_BORDER),
        margin=dict(l=55, r=20, t=55, b=45),
    )
    return fig


def exploration_ratio_chart():
    """Simulate efficiency at different exploration ratios (from paper Fig.11 inspired)."""
    ratios = [0.01, 0.02, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.40, 0.50]
    # Simulated efficiency values inspired by Kim et al. (2022) paper findings
    eff_country_m = [0.65, 0.66, 0.64, 0.62, 0.60, 0.58, 0.55, 0.52, 0.47, 0.41]
    eff_country_t = [0.30, 0.35, 0.42, 0.57, 0.63, 0.67, 0.71, 0.74, 0.72, 0.68]
    eff_custom    = [0.55, 0.58, 0.63, 0.70, 0.72, 0.71, 0.69, 0.68, 0.63, 0.55]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=[r*100 for r in ratios], y=eff_country_m,
                             name="Low-drift trade (best@1%)",
                             line=dict(color=WCO_BLUE, width=2), mode="lines+markers",
                             marker=dict(size=7)))
    fig.add_trace(go.Scatter(x=[r*100 for r in ratios], y=eff_country_t,
                             name="High-drift trade (best@30%)",
                             line=dict(color=WCO_GREEN, width=2), mode="lines+markers",
                             marker=dict(size=7)))
    fig.add_trace(go.Scatter(x=[r*100 for r in ratios], y=eff_custom,
                             name="This simulation",
                             line=dict(color=WCO_GOLD, width=2.5, dash="dot"), mode="lines+markers",
                             marker=dict(size=9, symbol="star")))
    fig.add_vline(x=10, line=dict(color=WCO_GOLD, dash="dash", width=1.5),
                  annotation_text="Default 10% β†’", annotation_font_color=WCO_GOLD)
    fig.update_layout(
        paper_bgcolor="#070E1C", plot_bgcolor="#0B1220",
        font=dict(family="IBM Plex Sans", color="#D0DCF0", size=11), height=360,
        title=dict(text="<b>Efficiency Index vs Exploration Ratio Ξ΅ (inspired by Kim et al. 2022)</b>",
                   font=dict(color=WCO_GOLD, size=14, family="Playfair Display"), x=0.5),
        xaxis=dict(title="Exploration Ratio Ξ΅ (%)", gridcolor="#1E3A6E"),
        yaxis=dict(title="Norm-Rev@10% (Efficiency)", gridcolor="#1E3A6E", range=[0.25, 0.85]),
        legend=dict(bgcolor="#0F1C35", bordercolor=WCO_BORDER),
        margin=dict(l=55, r=20, t=55, b=45),
    )
    return fig


def risk_score_threshold_table(df):
    """Show how top-scored bills materialise into Red/Yellow/Green."""
    bins   = pd.cut(df["fraud_score"], bins=[0,.2,.4,.6,.8,1.0],
                    labels=["0-20%","20-40%","40-60%","60-80%","80-100%"])
    result = df.groupby(bins, observed=True).agg(
        Bills=("bill_id","count"),
        Red=("channel", lambda x: (x=="RED").sum()),
        Yellow=("channel", lambda x: (x=="YELLOW").sum()),
        Green=("channel", lambda x: (x=="GREEN").sum()),
        Fraud_Detected=("inspection_outcome", lambda x: (x=="FRAUD_DETECTED").sum()),
        Avg_Revenue=("detected_revenue","mean"),
        Illicit_Count=("is_illicit","sum"),
    ).reset_index()
    result.columns = ["Risk Score Band","Bills","RED","YELLOW","GREEN",
                      "Fraud Detected","Avg Revenue ($)","True Illicit"]
    result["Avg Revenue ($)"] = result["Avg Revenue ($)"].round(2)
    result["Detection Rate (%)"] = (
        100 * result["Fraud Detected"] / result["Bills"].replace(0,1)
    ).round(1)
    return result


def show():
    inject_global_css()
    page_header("πŸ†", "Bandwidth Optimisation & Efficiency Report",
                "10% INTERDICTION BANDWIDTH Β· ACCURACY Β· ROI Β· HYBRID vs STATIC COMPARISON")

    if "sim_df" not in st.session_state:
        st.markdown("""
        <div style="background:#0F1C35;border:2px dashed #1E3A6E;border-radius:14px;
                    padding:60px;text-align:center;margin-top:30px;">
          <div style="font-size:40px;">⚠️</div>
          <div style="color:#F5A800;font-size:18px;font-family:'Playfair Display',serif;">
            No Simulation Data Found
          </div>
          <div style="color:#6B85AA;margin-top:10px;">
            Please run the simulation on <b>Page 3</b> first.
          </div>
        </div>""", unsafe_allow_html=True)
        return

    df         = st.session_state.sim_df
    efficiency = st.session_state.get("sim_efficiency", {})
    hybrid     = efficiency.get("hybrid", {})
    baseline   = efficiency.get("baseline", {})
    improve    = efficiency.get("improvement_pct", 0)

    # ── KPI headline strip ────────────────────────────────────────
    metric_row([
        (f"{hybrid.get('efficiency_index',0):.3f}", "Hybrid Efficiency Index", WCO_GREEN),
        (f"{baseline.get('efficiency_index',0):.2f}","Baseline Efficiency",    WCO_RED),
        (f"+{improve}%",                             "Efficiency Improvement",  WCO_GOLD),
        (f"{hybrid.get('precision',0)*100:.1f}%",    "Hybrid Precision",        WCO_BLUE),
        (f"{hybrid.get('detection_rate',0)*100:.1f}%","Detection Rate",         WCO_GREEN),
    ])

    # ── WCO Efficiency scorecard ──────────────────────────────────
    st.markdown('<div class="section-title">πŸ“ WCO Efficiency Scorecard</div>',
                unsafe_allow_html=True)

    sc_data = [
        ("Static Baseline", "DATE Only (No Learning)", "10%",
         f"{baseline.get('detection_rate',0.041)*100:.1f}%",
         f"{baseline.get('efficiency_index',0.41):.2f}", WCO_RED,   "❌ Degrades over time"),
        ("Naive Hybrid",    "DATE 90% + Random 10%",   "10%",
         "5.6%", "0.56", "#F5A800",  "⚠️  Improvement but random exploration"),
        ("No-CelH Hybrid",  "DATE 90% + gATE/bATE 10%","10%",
         f"{hybrid.get('detection_rate',0.082)*100:.1f}%",
         f"{hybrid.get('efficiency_index',0.82):.3f}", WCO_GREEN, "βœ… Best β€” self-learning"),
    ]
    rows_html = ""
    for model, strategy, sel, det, eff, color, verdict in sc_data:
        rows_html += f"""
        <tr>
          <td><b style="color:{color};">{model}</b></td>
          <td style="color:#8BAAD4;font-size:12px;">{strategy}</td>
          <td style="text-align:center;">{sel}</td>
          <td style="text-align:center;">{det}</td>
          <td style="text-align:center;">
            <b style="color:{color};font-size:15px;">{eff}</b></td>
          <td style="font-size:12px;">{verdict}</td>
        </tr>"""
    st.markdown(f"""
    <table class="wco-table">
      <thead><tr>
        <th>Model</th><th>Strategy</th><th>Selection Rate</th>
        <th>Detection Rate</th><th>Efficiency Index</th><th>Verdict</th>
      </tr></thead>
      <tbody>{rows_html}</tbody>
    </table>""", unsafe_allow_html=True)

    st.markdown("<br/>", unsafe_allow_html=True)

    # ── Charts ────────────────────────────────────────────────────
    st.plotly_chart(efficiency_comparison_chart(hybrid, baseline), use_container_width=True)

    c1, c2 = st.columns(2)
    with c1:
        st.plotly_chart(bandwidth_optimisation_chart(df), use_container_width=True)
    with c2:
        st.plotly_chart(exploration_ratio_chart(), use_container_width=True)

    # ── Risk score threshold table ────────────────────────────────
    st.markdown('<div class="section-title">πŸ“Š Risk Score Band β†’ Channel Assignment</div>',
                unsafe_allow_html=True)
    st.markdown("""<div class="alert-gold">
      Shows how the 10% bandwidth is allocated across risk score bands.
      High-scoring bills (60–100%) dominate the RED channel, confirming the DATE optimisation.
    </div>""", unsafe_allow_html=True)

    df_thr = risk_score_threshold_table(df)
    # Colour-coded bar chart
    fig_band = go.Figure()
    for ch, color in [("RED","#C8102E"),("YELLOW","#F5A800"),("GREEN","#00843D")]:
        fig_band.add_trace(go.Bar(
            x=df_thr["Risk Score Band"], y=df_thr[ch],
            name=ch, marker_color=color, opacity=0.85,
        ))
    fig_band.update_layout(
        paper_bgcolor="#070E1C", plot_bgcolor="#0B1220", barmode="stack",
        font=dict(color="#D0DCF0", size=11), height=320,
        title=dict(text="<b>Channel Assignment by Risk Score Band</b>",
                   font=dict(color=WCO_GOLD, size=13), x=0.5),
        xaxis=dict(gridcolor="#1E3A6E", title="Risk Score Band"),
        yaxis=dict(gridcolor="#1E3A6E", title="Bills"),
        legend=dict(bgcolor="#0F1C35", bordercolor=WCO_BORDER),
        margin=dict(l=50, r=20, t=45, b=45),
    )
    st.plotly_chart(fig_band, use_container_width=True)

    st.dataframe(
        df_thr.style.set_properties(**{"background-color": WCO_CARD_BG,
                                       "color": "#D0DCF0", "font-size": "12px"}),
        use_container_width=True,
    )

    # ── Best Optimisation Score card ──────────────────────────────
    st.markdown('<div class="section-title">πŸ₯‡ Optimisation Summary</div>', unsafe_allow_html=True)
    col1, col2, col3 = st.columns(3)
    with col1:
        st.markdown(f"""
        <div style="background:#071A10;border:2px solid #00843D;border-radius:12px;
                    padding:24px;text-align:center;">
          <div style="color:#00843D;font-size:13px;margin-bottom:8px;">HYBRID EFFICIENCY INDEX</div>
          <div style="color:#44CC88;font-family:'Playfair Display',serif;font-size:48px;
                      font-weight:700;">{hybrid.get('efficiency_index',0.82):.3f}</div>
          <div style="color:#6B85AA;font-size:12px;margin-top:8px;">vs Static: {baseline.get('efficiency_index',0.41):.2f}</div>
          <div style="color:#44CC88;font-size:13px;font-weight:600;margin-top:4px;">+{improve}% improvement</div>
        </div>""", unsafe_allow_html=True)
    with col2:
        revenue_uplift = hybrid.get('revenue',0) - baseline.get('revenue',0)
        st.markdown(f"""
        <div style="background:#1A1000;border:2px solid #C8A951;border-radius:12px;
                    padding:24px;text-align:center;">
          <div style="color:#C8A951;font-size:13px;margin-bottom:8px;">REVENUE UPLIFT vs BASELINE</div>
          <div style="color:#FFD700;font-family:'Playfair Display',serif;font-size:42px;
                      font-weight:700;">${revenue_uplift:,.0f}</div>
          <div style="color:#6B85AA;font-size:12px;margin-top:8px;">Additional recovery vs static model</div>
          <div style="color:#C8A951;font-size:13px;font-weight:600;margin-top:4px;">Self-learning dividend</div>
        </div>""", unsafe_allow_html=True)
    with col3:
        precision_pct = hybrid.get('precision', 0.41) * 100
        st.markdown(f"""
        <div style="background:#0A1832;border:2px solid #0066CC;border-radius:12px;
                    padding:24px;text-align:center;">
          <div style="color:#0066CC;font-size:13px;margin-bottom:8px;">INSPECTION PRECISION</div>
          <div style="color:#66AAFF;font-family:'Playfair Display',serif;font-size:48px;
                      font-weight:700;">{precision_pct:.1f}%</div>
          <div style="color:#6B85AA;font-size:12px;margin-top:8px;">Fraud found per bill inspected</div>
          <div style="color:#66AAFF;font-size:13px;font-weight:600;margin-top:4px;">Optimised bandwidth ROI</div>
        </div>""", unsafe_allow_html=True)

    # ── Footer ────────────────────────────────────────────────────
    st.markdown("<br/>", unsafe_allow_html=True)
    st.markdown(f"""
    <div style="background:#0A1020;border:1px solid #1E3A6E;border-radius:10px;
                padding:20px 28px;text-align:center;">
      <div style="color:#C8A951;font-family:'Playfair Display',serif;font-size:14px;margin-bottom:8px;">
        πŸ“š Scientific References
      </div>
      <div style="color:#6B85AA;font-size:12px;line-height:1.9;">
        Kim, S. et al. (2022). <i>Active Learning for Human-in-the-Loop Customs Inspection.</i>
        IEEE Transactions on Knowledge and Data Engineering. DOI: 10.1109/TKDE.2022.3144299<br/>
        World Customs Organization. <i>WCO Risk Management Compendium.</i> Vol. 1–3.<br/>
        WCO BACUDA Initiative Β· DATE Model (KDD 2020) Β· gATE/bATE Exploration Strategy
      </div>
    </div>""", unsafe_allow_html=True)