Spaces:
Build error
Build error
| """ | |
| Claude Opus 4.5 Adapter for ARF | |
| Drop-in replacement for Hugging Face inference | |
| """ | |
| import os | |
| import logging | |
| from typing import Optional | |
| from dataclasses import dataclass | |
| logger = logging.getLogger(__name__) | |
| # Try to import anthropic, with graceful fallback | |
| try: | |
| import anthropic | |
| ANTHROPIC_AVAILABLE = True | |
| except ImportError: | |
| ANTHROPIC_AVAILABLE = False | |
| logger.warning("anthropic package not installed - using mock mode only") | |
| class ClaudeConfig: | |
| """Claude API configuration""" | |
| api_key: str | |
| model: str = "claude-opus-4" | |
| max_tokens: int = 512 | |
| temperature: float = 0.3 | |
| class ClaudeAdapter: | |
| """ | |
| Drop-in replacement for HF inference in ARF agents | |
| Features: | |
| - Automatic fallback to mock mode if no API key | |
| - Intelligent pre-written responses for demo | |
| - Same interface as HF inference | |
| - Built-in error handling | |
| """ | |
| def __init__(self, config: Optional[ClaudeConfig] = None): | |
| self.config = config or ClaudeConfig( | |
| api_key=os.environ.get("ANTHROPIC_API_KEY", "") | |
| ) | |
| if not ANTHROPIC_AVAILABLE: | |
| logger.warning("Anthropic package not available - mock mode only") | |
| self.mock_mode = True | |
| elif not self.config.api_key: | |
| logger.warning("No ANTHROPIC_API_KEY found - using mock mode") | |
| self.mock_mode = True | |
| else: | |
| try: | |
| self.client = anthropic.Anthropic(api_key=self.config.api_key) | |
| self.mock_mode = False | |
| logger.info(f"✅ Claude adapter initialized with model: {self.config.model}") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize Claude client: {e}") | |
| self.mock_mode = True | |
| def generate_completion( | |
| self, | |
| prompt: str, | |
| system_prompt: Optional[str] = None | |
| ) -> str: | |
| """ | |
| Generate completion using Claude or fallback to mock | |
| Args: | |
| prompt: User prompt | |
| system_prompt: Optional system context | |
| Returns: | |
| Generated text response | |
| """ | |
| if self.mock_mode: | |
| logger.debug("Using mock mode (no API key or package not available)") | |
| return self._mock_response(prompt) | |
| try: | |
| messages = [{"role": "user", "content": prompt}] | |
| kwargs = { | |
| "model": self.config.model, | |
| "max_tokens": self.config.max_tokens, | |
| "temperature": self.config.temperature, | |
| "messages": messages | |
| } | |
| if system_prompt: | |
| kwargs["system"] = system_prompt | |
| response = self.client.messages.create(**kwargs) | |
| # Extract text from response | |
| if response.content and len(response.content) > 0: | |
| return response.content[0].text | |
| logger.warning("Empty response from Claude - using mock") | |
| return self._mock_response(prompt) | |
| except Exception as e: | |
| logger.error(f"Claude API error: {e} - falling back to mock") | |
| return self._mock_response(prompt) | |
| def _mock_response(self, prompt: str) -> str: | |
| """ | |
| Intelligent fallback mock response for demo | |
| Pre-crafted to show system capabilities | |
| """ | |
| prompt_lower = prompt.lower() | |
| # Detective Agent Response | |
| if "detective" in prompt_lower or "anomaly" in prompt_lower: | |
| return """🔍 ANOMALY DETECTED: Payment gateway timeout pattern identified. | |
| PATTERN ANALYSIS: | |
| • Current error rate: 87% (baseline: <5%) | |
| • Latency spike: 8500ms P99 (baseline: ~100ms) | |
| • Pattern match: 94% similarity to incident 2024-11-15 (database connection pool exhaustion) | |
| CONFIDENCE: HIGH (0.87) | |
| CLASSIFICATION: Infrastructure failure - upstream dependency timeout | |
| AFFECTED METRICS: | |
| Primary: Error rate (+1740% vs baseline) | |
| Secondary: Latency (+8400% vs baseline) | |
| Tertiary: Throughput degradation | |
| RECOMMENDATION: Immediate investigation of upstream payment provider status + connection pool health check required.""" | |
| # Diagnostician Agent Response | |
| elif "diagnostician" in prompt_lower or "root cause" in prompt_lower: | |
| return """🔬 ROOT CAUSE ANALYSIS: | |
| PRIMARY CAUSE: | |
| Upstream payment provider latency spike (avg response: 8.5s, normal: <500ms) | |
| SECONDARY FACTORS: | |
| • Connection pool exhaustion (95% utilized) | |
| • Retry storm amplifying load (exponential backoff not engaged) | |
| • Circuit breaker threshold not reached (87% < 90% threshold) | |
| EVIDENCE CHAIN: | |
| 1. Error rate spike correlates with provider status page incident (timestamp alignment) | |
| 2. Connection pool saturation occurred 45 seconds before error spike | |
| 3. Upstream API latency increased 17x baseline | |
| 4. Historical pattern match: 94% similarity to Nov 15 incident | |
| RECOMMENDED ACTION: REROUTE | |
| • Target: gateway-2 (backup payment processor) | |
| • Expected recovery: 45±5 seconds | |
| • Success probability: 92% (based on historical data) | |
| RATIONALE: Rerouting bypasses degraded provider, allows time for upstream recovery.""" | |
| # Predictive Agent Response | |
| elif "predictive" in prompt_lower or "forecast" in prompt_lower: | |
| return """📈 PREDICTIVE FORECAST ANALYSIS: | |
| CURRENT TRAJECTORY: | |
| • Error rate: Increasing at 12%/minute (exponential trend) | |
| • Latency: Accelerating degradation (quadratic curve) | |
| • Resource utilization: CPU 75%, Memory 82% (stable) | |
| TIME-TO-FAILURE ESTIMATES: | |
| • Critical threshold (>95% error rate): ~8 minutes | |
| • Complete service failure: ~12 minutes | |
| • Current impact: 1,240 active users affected | |
| RISK ASSESSMENT: | |
| Risk Score: 0.85 (HIGH) | |
| Confidence: 0.79 | |
| Trend: DETERIORATING | |
| BUSINESS IMPACT FORECAST: | |
| • Current revenue loss: \$12,000/minute | |
| • Projected 15-min loss (no action): \$180,000 | |
| • Customer churn risk: MEDIUM (historical correlation: 0.67) | |
| • SLA violation: IMMINENT (99.9% target, current: 13% availability) | |
| RECOMMENDATIONS: | |
| Primary: Execute REROUTE action immediately (Diagnostician recommendation) | |
| Secondary: Scale connection pool +50% capacity | |
| Tertiary: Enable aggressive circuit breaking (lower threshold to 75%) | |
| PREVENTIVE MEASURES: | |
| Monitor upstream provider health proactively, implement predictive circuit breaking.""" | |
| # Generic/Synthesis Response | |
| else: | |
| return """✅ MULTI-AGENT ANALYSIS COMPLETE | |
| SYSTEM STATUS: Incident detected and analyzed | |
| CONFIDENCE: HIGH (0.85) | |
| SYNTHESIS: | |
| All agents have completed analysis. The system has identified a critical upstream dependency failure requiring immediate intervention. Recovery action has been selected based on historical success patterns and current system state. | |
| Recommended action: REROUTE to backup systems | |
| Expected outcome: Service restoration within 45 seconds | |
| Continuing autonomous monitoring...""" | |
| # Singleton instance | |
| _claude_adapter: Optional[ClaudeAdapter] = None | |
| def get_claude_adapter() -> ClaudeAdapter: | |
| """Get or create Claude adapter singleton""" | |
| global _claude_adapter | |
| if _claude_adapter is None: | |
| _claude_adapter = ClaudeAdapter() | |
| return _claude_adapter |