| # Wilson confidence interval monitor for Rust enforcer agreement | |
| from prometheus_client import Gauge | |
| import math | |
| LOWER_BOUND = Gauge( | |
| "arf_rust_agreement_lower_bound", | |
| "Lower 99.9% Wilson bound on agreement rate", | |
| ) | |
| def wilson_lower(success, total, z=3.291): | |
| """ | |
| Compute the lower bound of the Wilson confidence interval | |
| for a binomial proportion. | |
| Parameters | |
| ---------- | |
| success : int | |
| Number of agreed evaluations. | |
| total : int | |
| Total number of shadow evaluations (agreed + diverged). | |
| z : float | |
| Z‑score for the desired confidence level (default 3.291 for 99.9%). | |
| Returns | |
| ------- | |
| float | |
| Lower bound of the Wilson interval, clamped to [0, 1]. | |
| """ | |
| if total == 0: | |
| return 0.0 | |
| p = success / total | |
| n = total | |
| denom = 1 + z**2 / n | |
| center = (p + z**2 / (2 * n)) / denom | |
| margin = z * math.sqrt(p * (1 - p) / n + z**2 / (4 * n**2)) / denom | |
| return max(0.0, center - margin) | |
| def update(agreed, diverged): | |
| """ | |
| Query the Prometheus agreement counters and set the lower‑bound gauge. | |
| This function is called periodically by the background thread started | |
| in the API lifespan (see `app/main.py`). | |
| Parameters | |
| ---------- | |
| agreed : int | |
| Current value of `arf_rust_agreement_total{result="agreed"}`. | |
| diverged : int | |
| Current value of `arf_rust_agreement_total{result="diverged"}`. | |
| """ | |
| lower = wilson_lower(agreed, agreed + diverged) | |
| LOWER_BOUND.set(lower) | |