Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,16 @@ import sys
|
|
| 16 |
import functools
|
| 17 |
from scipy.stats import beta
|
| 18 |
import plotly.graph_objects as go
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# ----------------------------------------------------------------------
|
| 22 |
# Configuration from environment variables with validation
|
|
@@ -125,12 +134,18 @@ def vacuum_db():
|
|
| 125 |
logger.error(f"Vacuum failed: {e}")
|
| 126 |
|
| 127 |
# ----------------------------------------------------------------------
|
| 128 |
-
# Prometheus metrics
|
| 129 |
# ----------------------------------------------------------------------
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
# ----------------------------------------------------------------------
|
| 136 |
# Thread‑safe history (in‑memory + DB backup)
|
|
@@ -152,8 +167,9 @@ def update_dashboard_data(decision: dict, risk: float):
|
|
| 152 |
risk_history.pop(0)
|
| 153 |
save_decision_to_db(decision, risk)
|
| 154 |
# Update Prometheus metrics
|
| 155 |
-
|
| 156 |
-
|
|
|
|
| 157 |
|
| 158 |
def refresh_history_from_db():
|
| 159 |
"""Load recent history from database (called at startup)."""
|
|
@@ -165,7 +181,8 @@ def refresh_history_from_db():
|
|
| 165 |
for ts, dec, risk in decisions:
|
| 166 |
decision_history.append((ts, dec, risk))
|
| 167 |
risk_history.append((ts, risk))
|
| 168 |
-
|
|
|
|
| 169 |
|
| 170 |
# ----------------------------------------------------------------------
|
| 171 |
# Memory monitoring (daemon thread with graceful stop)
|
|
@@ -293,7 +310,8 @@ def handle_infra_with_governance(fault_type: str, context_window: int, session_s
|
|
| 293 |
}
|
| 294 |
}
|
| 295 |
# Record latency metric
|
| 296 |
-
|
|
|
|
| 297 |
return output, session_state
|
| 298 |
except Exception as e:
|
| 299 |
logger.exception("Error in handle_infra_with_governance")
|
|
@@ -342,7 +360,8 @@ def run_hmc_mcmc(samples: int, warmup: int):
|
|
| 342 |
# Input validation
|
| 343 |
samples = max(500, min(10000, int(samples)))
|
| 344 |
warmup = max(100, min(2000, int(warmup)))
|
| 345 |
-
|
|
|
|
| 346 |
|
| 347 |
# Generate data: 10 observations with mean 0.5, std 0.2
|
| 348 |
np.random.seed(42) # for reproducibility
|
|
@@ -387,7 +406,6 @@ def run_hmc_mcmc(samples: int, warmup: int):
|
|
| 387 |
# ----------------------------------------------------------------------
|
| 388 |
# Dashboard plots (thread‑safe with caching)
|
| 389 |
# ----------------------------------------------------------------------
|
| 390 |
-
# Simple TTL cache decorator
|
| 391 |
class TTLCache:
|
| 392 |
def __init__(self, ttl_seconds=5):
|
| 393 |
self.ttl = ttl_seconds
|
|
@@ -673,9 +691,13 @@ with gr.Blocks(title=f"ARF v{VERSION} – Bayesian Risk Scoring Demo") as demo:
|
|
| 673 |
outputs=[hmc_summary, hmc_trace_plot, hmc_pair_plot]
|
| 674 |
)
|
| 675 |
|
| 676 |
-
#
|
| 677 |
-
|
| 678 |
-
|
| 679 |
if __name__ == "__main__":
|
|
|
|
| 680 |
demo.queue()
|
|
|
|
|
|
|
|
|
|
| 681 |
demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 16 |
import functools
|
| 17 |
from scipy.stats import beta
|
| 18 |
import plotly.graph_objects as go
|
| 19 |
+
|
| 20 |
+
# ----------------------------------------------------------------------
|
| 21 |
+
# Prometheus metrics (optional)
|
| 22 |
+
# ----------------------------------------------------------------------
|
| 23 |
+
try:
|
| 24 |
+
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
|
| 25 |
+
PROMETHEUS_AVAILABLE = True
|
| 26 |
+
except ImportError:
|
| 27 |
+
PROMETHEUS_AVAILABLE = False
|
| 28 |
+
logging.warning("prometheus-client not installed, metrics endpoint disabled")
|
| 29 |
|
| 30 |
# ----------------------------------------------------------------------
|
| 31 |
# Configuration from environment variables with validation
|
|
|
|
| 134 |
logger.error(f"Vacuum failed: {e}")
|
| 135 |
|
| 136 |
# ----------------------------------------------------------------------
|
| 137 |
+
# Prometheus metrics (dummy if unavailable)
|
| 138 |
# ----------------------------------------------------------------------
|
| 139 |
+
if PROMETHEUS_AVAILABLE:
|
| 140 |
+
decisions_total = Counter('arf_decisions_total', 'Total decisions made', ['action'])
|
| 141 |
+
risk_gauge = Gauge('arf_current_risk', 'Current risk score')
|
| 142 |
+
decision_latency = Histogram('arf_decision_latency_seconds', 'Time to evaluate intent')
|
| 143 |
+
mcmc_runs = Counter('arf_mcmc_runs_total', 'Total MCMC runs')
|
| 144 |
+
else:
|
| 145 |
+
decisions_total = None
|
| 146 |
+
risk_gauge = None
|
| 147 |
+
decision_latency = None
|
| 148 |
+
mcmc_runs = None
|
| 149 |
|
| 150 |
# ----------------------------------------------------------------------
|
| 151 |
# Thread‑safe history (in‑memory + DB backup)
|
|
|
|
| 167 |
risk_history.pop(0)
|
| 168 |
save_decision_to_db(decision, risk)
|
| 169 |
# Update Prometheus metrics
|
| 170 |
+
if PROMETHEUS_AVAILABLE:
|
| 171 |
+
decisions_total.labels(action=decision.get("risk_level", "unknown")).inc()
|
| 172 |
+
risk_gauge.set(risk)
|
| 173 |
|
| 174 |
def refresh_history_from_db():
|
| 175 |
"""Load recent history from database (called at startup)."""
|
|
|
|
| 181 |
for ts, dec, risk in decisions:
|
| 182 |
decision_history.append((ts, dec, risk))
|
| 183 |
risk_history.append((ts, risk))
|
| 184 |
+
if PROMETHEUS_AVAILABLE:
|
| 185 |
+
risk_gauge.set(risk) # update gauge with latest risk
|
| 186 |
|
| 187 |
# ----------------------------------------------------------------------
|
| 188 |
# Memory monitoring (daemon thread with graceful stop)
|
|
|
|
| 310 |
}
|
| 311 |
}
|
| 312 |
# Record latency metric
|
| 313 |
+
if PROMETHEUS_AVAILABLE:
|
| 314 |
+
decision_latency.observe(time.time() - start_time)
|
| 315 |
return output, session_state
|
| 316 |
except Exception as e:
|
| 317 |
logger.exception("Error in handle_infra_with_governance")
|
|
|
|
| 360 |
# Input validation
|
| 361 |
samples = max(500, min(10000, int(samples)))
|
| 362 |
warmup = max(100, min(2000, int(warmup)))
|
| 363 |
+
if PROMETHEUS_AVAILABLE:
|
| 364 |
+
mcmc_runs.inc() # record metric
|
| 365 |
|
| 366 |
# Generate data: 10 observations with mean 0.5, std 0.2
|
| 367 |
np.random.seed(42) # for reproducibility
|
|
|
|
| 406 |
# ----------------------------------------------------------------------
|
| 407 |
# Dashboard plots (thread‑safe with caching)
|
| 408 |
# ----------------------------------------------------------------------
|
|
|
|
| 409 |
class TTLCache:
|
| 410 |
def __init__(self, ttl_seconds=5):
|
| 411 |
self.ttl = ttl_seconds
|
|
|
|
| 691 |
outputs=[hmc_summary, hmc_trace_plot, hmc_pair_plot]
|
| 692 |
)
|
| 693 |
|
| 694 |
+
# ----------------------------------------------------------------------
|
| 695 |
+
# Launch
|
| 696 |
+
# ----------------------------------------------------------------------
|
| 697 |
if __name__ == "__main__":
|
| 698 |
+
# Enable queue and add metrics route if available
|
| 699 |
demo.queue()
|
| 700 |
+
if PROMETHEUS_AVAILABLE:
|
| 701 |
+
# Access the underlying FastAPI app after queueing
|
| 702 |
+
demo.app.add_api_route("/metrics", lambda: (generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST}), methods=["GET"])
|
| 703 |
demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)
|