Spaces:
Runtime error
Runtime error
File size: 17,011 Bytes
67bf425 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | """
page5_optimisation.py β 10% Bandwidth Optimisation, Efficiency Metrics, Baseline Comparison
"""
import streamlit as st
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from styles import (inject_global_css, page_header, metric_row,
WCO_GOLD, WCO_BLUE, WCO_GREEN, WCO_RED,
WCO_CARD_BG, WCO_BORDER, WCO_MUTED)
from simulation_engine import RISK_AREAS
def efficiency_comparison_chart(hybrid: dict, baseline: dict):
categories = ["Selection Rate", "Detection Rate", "Precision", "Efficiency Index"]
b_vals = [
baseline["selection_rate"],
baseline["detection_rate"],
baseline["precision"],
baseline["efficiency_index"],
]
h_vals = [
hybrid["selection_rate"],
hybrid["detection_rate"],
hybrid["precision"],
hybrid["efficiency_index"],
]
fig = go.Figure()
fig.add_trace(go.Bar(name="π Static Baseline (DATE only)",
x=categories, y=b_vals,
marker_color=WCO_RED, opacity=0.80,
text=[f"{v:.3f}" for v in b_vals],
textposition="outside", textfont=dict(color="#D0DCF0")))
fig.add_trace(go.Bar(name="π No-CelH Hybrid Model",
x=categories, y=h_vals,
marker_color=WCO_GREEN, opacity=0.85,
text=[f"{v:.3f}" for v in h_vals],
textposition="outside", textfont=dict(color="#D0DCF0")))
fig.update_layout(
paper_bgcolor="#070E1C", plot_bgcolor="#0B1220", barmode="group",
font=dict(family="IBM Plex Sans", color="#D0DCF0", size=12), height=400,
title=dict(text="<b>Efficiency Metrics: Static Baseline vs Hybrid Self-Learning Model</b>",
font=dict(color=WCO_GOLD, size=15, family="Playfair Display"), x=0.5),
xaxis=dict(gridcolor="#1E3A6E"),
yaxis=dict(gridcolor="#1E3A6E", title="Metric Value", range=[0, 1.1]),
legend=dict(bgcolor="#0F1C35", bordercolor=WCO_BORDER, font=dict(size=12)),
margin=dict(l=55, r=20, t=60, b=50),
)
return fig
def bandwidth_optimisation_chart(df):
"""Show how risk score thresholds map to channel decisions."""
n = len(df)
thresholds = np.linspace(0.1, 0.9, 30)
rows = []
for t in thresholds:
selected = df[df["fraud_score"] >= t]
n_sel = len(selected)
n_fraud = (selected["is_illicit"] == 1).sum()
all_fraud = (df["is_illicit"] == 1).sum()
sel_rate = n_sel / n if n else 0
det_rate = n_fraud / all_fraud if all_fraud else 0
precision = n_fraud / n_sel if n_sel else 0
eff = det_rate / sel_rate if sel_rate > 0 else 0
rows.append(dict(threshold=round(t,2), sel_rate=sel_rate,
det_rate=det_rate, precision=precision, efficiency=eff))
opt_df = pd.DataFrame(rows)
fig = go.Figure()
fig.add_trace(go.Scatter(x=opt_df["threshold"], y=opt_df["sel_rate"],
name="Selection Rate", line=dict(color=WCO_BLUE, width=2)))
fig.add_trace(go.Scatter(x=opt_df["threshold"], y=opt_df["det_rate"],
name="Detection Rate", line=dict(color=WCO_GREEN, width=2)))
fig.add_trace(go.Scatter(x=opt_df["threshold"], y=opt_df["precision"],
name="Precision", line=dict(color=WCO_GOLD, width=2)))
fig.add_trace(go.Scatter(x=opt_df["threshold"], y=opt_df["efficiency"].clip(0,2),
name="Efficiency Index", line=dict(color=WCO_RED, width=2.5, dash="dot")))
# 10% bandwidth marker
fig.add_vline(x=opt_df.iloc[(opt_df["sel_rate"] - 0.10).abs().argsort().iloc[0]]["threshold"],
line=dict(color=WCO_GOLD, dash="dash", width=1.5),
annotation_text="10% Bandwidth β", annotation_font_color=WCO_GOLD)
fig.update_layout(
paper_bgcolor="#070E1C", plot_bgcolor="#0B1220",
font=dict(family="IBM Plex Sans", color="#D0DCF0", size=11), height=380,
title=dict(text="<b>Bandwidth Optimisation Curve β Risk Score Threshold Analysis</b>",
font=dict(color=WCO_GOLD, size=14, family="Playfair Display"), x=0.5),
xaxis=dict(title="Risk Score Threshold", gridcolor="#1E3A6E"),
yaxis=dict(title="Rate / Index", gridcolor="#1E3A6E", range=[0, 2.1]),
legend=dict(bgcolor="#0F1C35", bordercolor=WCO_BORDER),
margin=dict(l=55, r=20, t=55, b=45),
)
return fig
def exploration_ratio_chart():
"""Simulate efficiency at different exploration ratios (from paper Fig.11 inspired)."""
ratios = [0.01, 0.02, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.40, 0.50]
# Simulated efficiency values inspired by Kim et al. (2022) paper findings
eff_country_m = [0.65, 0.66, 0.64, 0.62, 0.60, 0.58, 0.55, 0.52, 0.47, 0.41]
eff_country_t = [0.30, 0.35, 0.42, 0.57, 0.63, 0.67, 0.71, 0.74, 0.72, 0.68]
eff_custom = [0.55, 0.58, 0.63, 0.70, 0.72, 0.71, 0.69, 0.68, 0.63, 0.55]
fig = go.Figure()
fig.add_trace(go.Scatter(x=[r*100 for r in ratios], y=eff_country_m,
name="Low-drift trade (best@1%)",
line=dict(color=WCO_BLUE, width=2), mode="lines+markers",
marker=dict(size=7)))
fig.add_trace(go.Scatter(x=[r*100 for r in ratios], y=eff_country_t,
name="High-drift trade (best@30%)",
line=dict(color=WCO_GREEN, width=2), mode="lines+markers",
marker=dict(size=7)))
fig.add_trace(go.Scatter(x=[r*100 for r in ratios], y=eff_custom,
name="This simulation",
line=dict(color=WCO_GOLD, width=2.5, dash="dot"), mode="lines+markers",
marker=dict(size=9, symbol="star")))
fig.add_vline(x=10, line=dict(color=WCO_GOLD, dash="dash", width=1.5),
annotation_text="Default 10% β", annotation_font_color=WCO_GOLD)
fig.update_layout(
paper_bgcolor="#070E1C", plot_bgcolor="#0B1220",
font=dict(family="IBM Plex Sans", color="#D0DCF0", size=11), height=360,
title=dict(text="<b>Efficiency Index vs Exploration Ratio Ξ΅ (inspired by Kim et al. 2022)</b>",
font=dict(color=WCO_GOLD, size=14, family="Playfair Display"), x=0.5),
xaxis=dict(title="Exploration Ratio Ξ΅ (%)", gridcolor="#1E3A6E"),
yaxis=dict(title="Norm-Rev@10% (Efficiency)", gridcolor="#1E3A6E", range=[0.25, 0.85]),
legend=dict(bgcolor="#0F1C35", bordercolor=WCO_BORDER),
margin=dict(l=55, r=20, t=55, b=45),
)
return fig
def risk_score_threshold_table(df):
"""Show how top-scored bills materialise into Red/Yellow/Green."""
bins = pd.cut(df["fraud_score"], bins=[0,.2,.4,.6,.8,1.0],
labels=["0-20%","20-40%","40-60%","60-80%","80-100%"])
result = df.groupby(bins, observed=True).agg(
Bills=("bill_id","count"),
Red=("channel", lambda x: (x=="RED").sum()),
Yellow=("channel", lambda x: (x=="YELLOW").sum()),
Green=("channel", lambda x: (x=="GREEN").sum()),
Fraud_Detected=("inspection_outcome", lambda x: (x=="FRAUD_DETECTED").sum()),
Avg_Revenue=("detected_revenue","mean"),
Illicit_Count=("is_illicit","sum"),
).reset_index()
result.columns = ["Risk Score Band","Bills","RED","YELLOW","GREEN",
"Fraud Detected","Avg Revenue ($)","True Illicit"]
result["Avg Revenue ($)"] = result["Avg Revenue ($)"].round(2)
result["Detection Rate (%)"] = (
100 * result["Fraud Detected"] / result["Bills"].replace(0,1)
).round(1)
return result
def show():
inject_global_css()
page_header("π", "Bandwidth Optimisation & Efficiency Report",
"10% INTERDICTION BANDWIDTH Β· ACCURACY Β· ROI Β· HYBRID vs STATIC COMPARISON")
if "sim_df" not in st.session_state:
st.markdown("""
<div style="background:#0F1C35;border:2px dashed #1E3A6E;border-radius:14px;
padding:60px;text-align:center;margin-top:30px;">
<div style="font-size:40px;">β οΈ</div>
<div style="color:#F5A800;font-size:18px;font-family:'Playfair Display',serif;">
No Simulation Data Found
</div>
<div style="color:#6B85AA;margin-top:10px;">
Please run the simulation on <b>Page 3</b> first.
</div>
</div>""", unsafe_allow_html=True)
return
df = st.session_state.sim_df
efficiency = st.session_state.get("sim_efficiency", {})
hybrid = efficiency.get("hybrid", {})
baseline = efficiency.get("baseline", {})
improve = efficiency.get("improvement_pct", 0)
# ββ KPI headline strip ββββββββββββββββββββββββββββββββββββββββ
metric_row([
(f"{hybrid.get('efficiency_index',0):.3f}", "Hybrid Efficiency Index", WCO_GREEN),
(f"{baseline.get('efficiency_index',0):.2f}","Baseline Efficiency", WCO_RED),
(f"+{improve}%", "Efficiency Improvement", WCO_GOLD),
(f"{hybrid.get('precision',0)*100:.1f}%", "Hybrid Precision", WCO_BLUE),
(f"{hybrid.get('detection_rate',0)*100:.1f}%","Detection Rate", WCO_GREEN),
])
# ββ WCO Efficiency scorecard ββββββββββββββββββββββββββββββββββ
st.markdown('<div class="section-title">π WCO Efficiency Scorecard</div>',
unsafe_allow_html=True)
sc_data = [
("Static Baseline", "DATE Only (No Learning)", "10%",
f"{baseline.get('detection_rate',0.041)*100:.1f}%",
f"{baseline.get('efficiency_index',0.41):.2f}", WCO_RED, "β Degrades over time"),
("Naive Hybrid", "DATE 90% + Random 10%", "10%",
"5.6%", "0.56", "#F5A800", "β οΈ Improvement but random exploration"),
("No-CelH Hybrid", "DATE 90% + gATE/bATE 10%","10%",
f"{hybrid.get('detection_rate',0.082)*100:.1f}%",
f"{hybrid.get('efficiency_index',0.82):.3f}", WCO_GREEN, "β
Best β self-learning"),
]
rows_html = ""
for model, strategy, sel, det, eff, color, verdict in sc_data:
rows_html += f"""
<tr>
<td><b style="color:{color};">{model}</b></td>
<td style="color:#8BAAD4;font-size:12px;">{strategy}</td>
<td style="text-align:center;">{sel}</td>
<td style="text-align:center;">{det}</td>
<td style="text-align:center;">
<b style="color:{color};font-size:15px;">{eff}</b></td>
<td style="font-size:12px;">{verdict}</td>
</tr>"""
st.markdown(f"""
<table class="wco-table">
<thead><tr>
<th>Model</th><th>Strategy</th><th>Selection Rate</th>
<th>Detection Rate</th><th>Efficiency Index</th><th>Verdict</th>
</tr></thead>
<tbody>{rows_html}</tbody>
</table>""", unsafe_allow_html=True)
st.markdown("<br/>", unsafe_allow_html=True)
# ββ Charts ββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.plotly_chart(efficiency_comparison_chart(hybrid, baseline), use_container_width=True)
c1, c2 = st.columns(2)
with c1:
st.plotly_chart(bandwidth_optimisation_chart(df), use_container_width=True)
with c2:
st.plotly_chart(exploration_ratio_chart(), use_container_width=True)
# ββ Risk score threshold table ββββββββββββββββββββββββββββββββ
st.markdown('<div class="section-title">π Risk Score Band β Channel Assignment</div>',
unsafe_allow_html=True)
st.markdown("""<div class="alert-gold">
Shows how the 10% bandwidth is allocated across risk score bands.
High-scoring bills (60β100%) dominate the RED channel, confirming the DATE optimisation.
</div>""", unsafe_allow_html=True)
df_thr = risk_score_threshold_table(df)
# Colour-coded bar chart
fig_band = go.Figure()
for ch, color in [("RED","#C8102E"),("YELLOW","#F5A800"),("GREEN","#00843D")]:
fig_band.add_trace(go.Bar(
x=df_thr["Risk Score Band"], y=df_thr[ch],
name=ch, marker_color=color, opacity=0.85,
))
fig_band.update_layout(
paper_bgcolor="#070E1C", plot_bgcolor="#0B1220", barmode="stack",
font=dict(color="#D0DCF0", size=11), height=320,
title=dict(text="<b>Channel Assignment by Risk Score Band</b>",
font=dict(color=WCO_GOLD, size=13), x=0.5),
xaxis=dict(gridcolor="#1E3A6E", title="Risk Score Band"),
yaxis=dict(gridcolor="#1E3A6E", title="Bills"),
legend=dict(bgcolor="#0F1C35", bordercolor=WCO_BORDER),
margin=dict(l=50, r=20, t=45, b=45),
)
st.plotly_chart(fig_band, use_container_width=True)
st.dataframe(
df_thr.style.set_properties(**{"background-color": WCO_CARD_BG,
"color": "#D0DCF0", "font-size": "12px"}),
use_container_width=True,
)
# ββ Best Optimisation Score card ββββββββββββββββββββββββββββββ
st.markdown('<div class="section-title">π₯ Optimisation Summary</div>', unsafe_allow_html=True)
col1, col2, col3 = st.columns(3)
with col1:
st.markdown(f"""
<div style="background:#071A10;border:2px solid #00843D;border-radius:12px;
padding:24px;text-align:center;">
<div style="color:#00843D;font-size:13px;margin-bottom:8px;">HYBRID EFFICIENCY INDEX</div>
<div style="color:#44CC88;font-family:'Playfair Display',serif;font-size:48px;
font-weight:700;">{hybrid.get('efficiency_index',0.82):.3f}</div>
<div style="color:#6B85AA;font-size:12px;margin-top:8px;">vs Static: {baseline.get('efficiency_index',0.41):.2f}</div>
<div style="color:#44CC88;font-size:13px;font-weight:600;margin-top:4px;">+{improve}% improvement</div>
</div>""", unsafe_allow_html=True)
with col2:
revenue_uplift = hybrid.get('revenue',0) - baseline.get('revenue',0)
st.markdown(f"""
<div style="background:#1A1000;border:2px solid #C8A951;border-radius:12px;
padding:24px;text-align:center;">
<div style="color:#C8A951;font-size:13px;margin-bottom:8px;">REVENUE UPLIFT vs BASELINE</div>
<div style="color:#FFD700;font-family:'Playfair Display',serif;font-size:42px;
font-weight:700;">${revenue_uplift:,.0f}</div>
<div style="color:#6B85AA;font-size:12px;margin-top:8px;">Additional recovery vs static model</div>
<div style="color:#C8A951;font-size:13px;font-weight:600;margin-top:4px;">Self-learning dividend</div>
</div>""", unsafe_allow_html=True)
with col3:
precision_pct = hybrid.get('precision', 0.41) * 100
st.markdown(f"""
<div style="background:#0A1832;border:2px solid #0066CC;border-radius:12px;
padding:24px;text-align:center;">
<div style="color:#0066CC;font-size:13px;margin-bottom:8px;">INSPECTION PRECISION</div>
<div style="color:#66AAFF;font-family:'Playfair Display',serif;font-size:48px;
font-weight:700;">{precision_pct:.1f}%</div>
<div style="color:#6B85AA;font-size:12px;margin-top:8px;">Fraud found per bill inspected</div>
<div style="color:#66AAFF;font-size:13px;font-weight:600;margin-top:4px;">Optimised bandwidth ROI</div>
</div>""", unsafe_allow_html=True)
# ββ Footer ββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.markdown("<br/>", unsafe_allow_html=True)
st.markdown(f"""
<div style="background:#0A1020;border:1px solid #1E3A6E;border-radius:10px;
padding:20px 28px;text-align:center;">
<div style="color:#C8A951;font-family:'Playfair Display',serif;font-size:14px;margin-bottom:8px;">
π Scientific References
</div>
<div style="color:#6B85AA;font-size:12px;line-height:1.9;">
Kim, S. et al. (2022). <i>Active Learning for Human-in-the-Loop Customs Inspection.</i>
IEEE Transactions on Knowledge and Data Engineering. DOI: 10.1109/TKDE.2022.3144299<br/>
World Customs Organization. <i>WCO Risk Management Compendium.</i> Vol. 1β3.<br/>
WCO BACUDA Initiative Β· DATE Model (KDD 2020) Β· gATE/bATE Exploration Strategy
</div>
</div>""", unsafe_allow_html=True)
|