petter2025 commited on
Commit
a6671a5
·
verified ·
1 Parent(s): 29094c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -15
app.py CHANGED
@@ -16,7 +16,16 @@ import sys
16
  import functools
17
  from scipy.stats import beta
18
  import plotly.graph_objects as go
19
- from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
 
 
 
 
 
 
 
 
 
20
 
21
  # ----------------------------------------------------------------------
22
  # Configuration from environment variables with validation
@@ -125,12 +134,18 @@ def vacuum_db():
125
  logger.error(f"Vacuum failed: {e}")
126
 
127
  # ----------------------------------------------------------------------
128
- # Prometheus metrics
129
  # ----------------------------------------------------------------------
130
- decisions_total = Counter('arf_decisions_total', 'Total decisions made', ['action'])
131
- risk_gauge = Gauge('arf_current_risk', 'Current risk score')
132
- decision_latency = Histogram('arf_decision_latency_seconds', 'Time to evaluate intent')
133
- mcmc_runs = Counter('arf_mcmc_runs_total', 'Total MCMC runs')
 
 
 
 
 
 
134
 
135
  # ----------------------------------------------------------------------
136
  # Thread‑safe history (in‑memory + DB backup)
@@ -152,8 +167,9 @@ def update_dashboard_data(decision: dict, risk: float):
152
  risk_history.pop(0)
153
  save_decision_to_db(decision, risk)
154
  # Update Prometheus metrics
155
- decisions_total.labels(action=decision.get("risk_level", "unknown")).inc()
156
- risk_gauge.set(risk)
 
157
 
158
  def refresh_history_from_db():
159
  """Load recent history from database (called at startup)."""
@@ -165,7 +181,8 @@ def refresh_history_from_db():
165
  for ts, dec, risk in decisions:
166
  decision_history.append((ts, dec, risk))
167
  risk_history.append((ts, risk))
168
- risk_gauge.set(risk) # update gauge with latest risk
 
169
 
170
  # ----------------------------------------------------------------------
171
  # Memory monitoring (daemon thread with graceful stop)
@@ -293,7 +310,8 @@ def handle_infra_with_governance(fault_type: str, context_window: int, session_s
293
  }
294
  }
295
  # Record latency metric
296
- decision_latency.observe(time.time() - start_time)
 
297
  return output, session_state
298
  except Exception as e:
299
  logger.exception("Error in handle_infra_with_governance")
@@ -342,7 +360,8 @@ def run_hmc_mcmc(samples: int, warmup: int):
342
  # Input validation
343
  samples = max(500, min(10000, int(samples)))
344
  warmup = max(100, min(2000, int(warmup)))
345
- mcmc_runs.inc() # record metric
 
346
 
347
  # Generate data: 10 observations with mean 0.5, std 0.2
348
  np.random.seed(42) # for reproducibility
@@ -387,7 +406,6 @@ def run_hmc_mcmc(samples: int, warmup: int):
387
  # ----------------------------------------------------------------------
388
  # Dashboard plots (thread‑safe with caching)
389
  # ----------------------------------------------------------------------
390
- # Simple TTL cache decorator
391
  class TTLCache:
392
  def __init__(self, ttl_seconds=5):
393
  self.ttl = ttl_seconds
@@ -673,9 +691,13 @@ with gr.Blocks(title=f"ARF v{VERSION} – Bayesian Risk Scoring Demo") as demo:
673
  outputs=[hmc_summary, hmc_trace_plot, hmc_pair_plot]
674
  )
675
 
676
- # Add Prometheus metrics endpoint
677
- demo.fastapi_app.add_api_route("/metrics", lambda: (generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST}), methods=["GET"])
678
-
679
  if __name__ == "__main__":
 
680
  demo.queue()
 
 
 
681
  demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)
 
16
  import functools
17
  from scipy.stats import beta
18
  import plotly.graph_objects as go
19
+
20
+ # ----------------------------------------------------------------------
21
+ # Prometheus metrics (optional)
22
+ # ----------------------------------------------------------------------
23
+ try:
24
+ from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
25
+ PROMETHEUS_AVAILABLE = True
26
+ except ImportError:
27
+ PROMETHEUS_AVAILABLE = False
28
+ logging.warning("prometheus-client not installed, metrics endpoint disabled")
29
 
30
  # ----------------------------------------------------------------------
31
  # Configuration from environment variables with validation
 
134
  logger.error(f"Vacuum failed: {e}")
135
 
136
  # ----------------------------------------------------------------------
137
+ # Prometheus metrics (dummy if unavailable)
138
  # ----------------------------------------------------------------------
139
+ if PROMETHEUS_AVAILABLE:
140
+ decisions_total = Counter('arf_decisions_total', 'Total decisions made', ['action'])
141
+ risk_gauge = Gauge('arf_current_risk', 'Current risk score')
142
+ decision_latency = Histogram('arf_decision_latency_seconds', 'Time to evaluate intent')
143
+ mcmc_runs = Counter('arf_mcmc_runs_total', 'Total MCMC runs')
144
+ else:
145
+ decisions_total = None
146
+ risk_gauge = None
147
+ decision_latency = None
148
+ mcmc_runs = None
149
 
150
  # ----------------------------------------------------------------------
151
  # Thread‑safe history (in‑memory + DB backup)
 
167
  risk_history.pop(0)
168
  save_decision_to_db(decision, risk)
169
  # Update Prometheus metrics
170
+ if PROMETHEUS_AVAILABLE:
171
+ decisions_total.labels(action=decision.get("risk_level", "unknown")).inc()
172
+ risk_gauge.set(risk)
173
 
174
  def refresh_history_from_db():
175
  """Load recent history from database (called at startup)."""
 
181
  for ts, dec, risk in decisions:
182
  decision_history.append((ts, dec, risk))
183
  risk_history.append((ts, risk))
184
+ if PROMETHEUS_AVAILABLE:
185
+ risk_gauge.set(risk) # update gauge with latest risk
186
 
187
  # ----------------------------------------------------------------------
188
  # Memory monitoring (daemon thread with graceful stop)
 
310
  }
311
  }
312
  # Record latency metric
313
+ if PROMETHEUS_AVAILABLE:
314
+ decision_latency.observe(time.time() - start_time)
315
  return output, session_state
316
  except Exception as e:
317
  logger.exception("Error in handle_infra_with_governance")
 
360
  # Input validation
361
  samples = max(500, min(10000, int(samples)))
362
  warmup = max(100, min(2000, int(warmup)))
363
+ if PROMETHEUS_AVAILABLE:
364
+ mcmc_runs.inc() # record metric
365
 
366
  # Generate data: 10 observations with mean 0.5, std 0.2
367
  np.random.seed(42) # for reproducibility
 
406
  # ----------------------------------------------------------------------
407
  # Dashboard plots (thread‑safe with caching)
408
  # ----------------------------------------------------------------------
 
409
  class TTLCache:
410
  def __init__(self, ttl_seconds=5):
411
  self.ttl = ttl_seconds
 
691
  outputs=[hmc_summary, hmc_trace_plot, hmc_pair_plot]
692
  )
693
 
694
+ # ----------------------------------------------------------------------
695
+ # Launch
696
+ # ----------------------------------------------------------------------
697
  if __name__ == "__main__":
698
+ # Enable queue and add metrics route if available
699
  demo.queue()
700
+ if PROMETHEUS_AVAILABLE:
701
+ # Access the underlying FastAPI app after queueing
702
+ demo.app.add_api_route("/metrics", lambda: (generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST}), methods=["GET"])
703
  demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)