| | """
|
| | Smart Fallback Manager with 305+ Free Resources
|
| | NO 404 ERRORS - Always returns data from available sources
|
| | """
|
| |
|
| | import asyncio
|
| | import aiohttp
|
| | import random
|
| | import time
|
| | from typing import List, Dict, Optional, Any
|
| | from dataclasses import dataclass, field
|
| | from enum import Enum
|
| | import logging
|
| | from datetime import datetime, timedelta
|
| |
|
| | logger = logging.getLogger(__name__)
|
| |
|
| |
|
| | class ResourceStatus(Enum):
|
| | """Resource health status"""
|
| | ACTIVE = "active"
|
| | DEGRADED = "degraded"
|
| | FAILED = "failed"
|
| | BLOCKED = "blocked"
|
| | PROXY_NEEDED = "proxy_needed"
|
| |
|
| |
|
| | @dataclass
|
| | class ResourceHealth:
|
| | """Track resource health"""
|
| | resource_id: str
|
| | status: ResourceStatus = ResourceStatus.ACTIVE
|
| | success_count: int = 0
|
| | failure_count: int = 0
|
| | last_success: Optional[datetime] = None
|
| | last_failure: Optional[datetime] = None
|
| | avg_response_time: float = 0.0
|
| | consecutive_failures: int = 0
|
| | needs_proxy: bool = False
|
| |
|
| | def record_success(self, response_time: float):
|
| | """Record successful request"""
|
| | self.success_count += 1
|
| | self.consecutive_failures = 0
|
| | self.last_success = datetime.now()
|
| |
|
| |
|
| | if self.avg_response_time == 0:
|
| | self.avg_response_time = response_time
|
| | else:
|
| | self.avg_response_time = 0.7 * self.avg_response_time + 0.3 * response_time
|
| |
|
| |
|
| | if self.status in [ResourceStatus.FAILED, ResourceStatus.DEGRADED]:
|
| | self.status = ResourceStatus.ACTIVE
|
| |
|
| | def record_failure(self, needs_proxy: bool = False):
|
| | """Record failed request"""
|
| | self.failure_count += 1
|
| | self.consecutive_failures += 1
|
| | self.last_failure = datetime.now()
|
| |
|
| | if needs_proxy:
|
| | self.needs_proxy = True
|
| | self.status = ResourceStatus.PROXY_NEEDED
|
| | elif self.consecutive_failures >= 5:
|
| | self.status = ResourceStatus.FAILED
|
| | elif self.consecutive_failures >= 3:
|
| | self.status = ResourceStatus.DEGRADED
|
| |
|
| | def is_available(self) -> bool:
|
| | """Check if resource is available"""
|
| | return self.status in [ResourceStatus.ACTIVE, ResourceStatus.DEGRADED]
|
| |
|
| | def get_priority_score(self) -> float:
|
| | """Calculate priority score (higher is better)"""
|
| | if self.status == ResourceStatus.FAILED:
|
| | return 0.0
|
| |
|
| | success_rate = self.success_count / max(self.success_count + self.failure_count, 1)
|
| | recency_bonus = 1.0 if self.last_success and \
|
| | (datetime.now() - self.last_success).seconds < 300 else 0.5
|
| | speed_bonus = max(0.5, 1.0 - (self.avg_response_time / 5.0))
|
| |
|
| | return success_rate * recency_bonus * speed_bonus
|
| |
|
| |
|
| | class SmartFallbackManager:
|
| | """
|
| | Intelligent fallback manager using 305+ free resources
|
| | NEVER returns 404 - always finds working source
|
| | """
|
| |
|
| | def __init__(self, resources_json_path: Optional[str] = None):
|
| | """
|
| | resources_json_path:
|
| | - If provided, will be used.
|
| | - Otherwise, tries common repo paths (including `api-resources/crypto_resources_unified_2025-11-11.json`).
|
| | """
|
| | self.resources_json_path = resources_json_path or self._resolve_default_registry_path()
|
| | self.resources: Dict[str, List[Dict]] = {}
|
| | self.health_tracker: Dict[str, ResourceHealth] = {}
|
| | self.proxy_manager = None
|
| |
|
| |
|
| | self._load_resources()
|
| |
|
| | logger.info(f"✅ SmartFallbackManager initialized with {self._count_total_resources()} resources")
|
| |
|
| | def _resolve_default_registry_path(self) -> str:
|
| | """
|
| | Find the best registry JSON path available in this repo / HF Space container.
|
| | """
|
| | import os
|
| | from pathlib import Path
|
| |
|
| | env_path = (os.getenv("CRYPTO_RESOURCES_JSON") or "").strip()
|
| | if env_path:
|
| | return env_path
|
| |
|
| | candidates = [
|
| |
|
| | Path("api-resources") / "crypto_resources_unified_2025-11-11.json",
|
| | Path("crypto_resources_unified_2025-11-11.json"),
|
| |
|
| | Path("api-resources") / "crypto_resources_unified.json",
|
| | Path("crypto_resources_unified.json"),
|
| |
|
| | Path("/workspace/cursor-instructions/consolidated_crypto_resources.json"),
|
| | ]
|
| |
|
| | for p in candidates:
|
| | try:
|
| | if p.exists() and p.is_file():
|
| | return str(p)
|
| | except Exception:
|
| | continue
|
| |
|
| |
|
| | return str(candidates[0])
|
| |
|
| | def _load_resources(self):
|
| | """Load all resources from JSON registry (supports multiple schemas)."""
|
| | import json
|
| | from pathlib import Path
|
| |
|
| | path = Path(self.resources_json_path)
|
| | if not path.exists():
|
| | logger.error("❌ Resources registry not found at: %s", self.resources_json_path)
|
| | self.resources = {}
|
| | self.health_tracker = {}
|
| | return
|
| |
|
| | with path.open("r", encoding="utf-8") as f:
|
| | data = json.load(f)
|
| |
|
| | resources_list: List[Dict[str, Any]] = []
|
| |
|
| |
|
| | if isinstance(data, dict) and isinstance(data.get("registry"), dict):
|
| | registry = data.get("registry", {})
|
| | for category, entries in registry.items():
|
| | if category == "metadata":
|
| | continue
|
| | if not isinstance(entries, list):
|
| | continue
|
| | for entry in entries:
|
| | if not isinstance(entry, dict):
|
| | continue
|
| | base_url = entry.get("base_url") or entry.get("url")
|
| | if not base_url:
|
| | continue
|
| | auth = entry.get("auth") if isinstance(entry.get("auth"), dict) else {}
|
| | auth_type = str((auth or {}).get("type", "none")).lower()
|
| | auth_key = (auth or {}).get("key")
|
| |
|
| |
|
| |
|
| | is_free = auth_type in ("none", "noauth", "free", "public") or bool(auth_key)
|
| |
|
| |
|
| | actual_category = category.replace("_additional", "") if category.endswith("_additional") else category
|
| |
|
| | resources_list.append(
|
| | {
|
| | "id": entry.get("id") or f"{actual_category}_{len(resources_list)}",
|
| | "name": entry.get("name") or entry.get("id") or "unknown",
|
| | "category": actual_category,
|
| | "base_url": base_url,
|
| | "is_free": is_free,
|
| | "auth": entry.get("auth"),
|
| | "docs_url": entry.get("docs_url"),
|
| | "endpoints": entry.get("endpoints"),
|
| | "notes": entry.get("notes"),
|
| | }
|
| | )
|
| |
|
| |
|
| | elif isinstance(data, dict) and isinstance(data.get("resources"), list):
|
| | for entry in data.get("resources", []):
|
| | if not isinstance(entry, dict):
|
| | continue
|
| | if not entry.get("base_url") or not entry.get("category"):
|
| | continue
|
| | resources_list.append(entry)
|
| |
|
| | else:
|
| | logger.error("❌ Unsupported resources registry schema in %s", self.resources_json_path)
|
| | self.resources = {}
|
| | self.health_tracker = {}
|
| | return
|
| |
|
| |
|
| | for resource in resources_list:
|
| | category = resource.get("category", "unknown")
|
| |
|
| | if category.endswith("_additional"):
|
| | category = category.replace("_additional", "")
|
| |
|
| | if category not in self.resources:
|
| | self.resources[category] = []
|
| | self.resources[category].append(resource)
|
| |
|
| | resource_id = str(resource.get("id") or "")
|
| | if resource_id and resource_id not in self.health_tracker:
|
| | self.health_tracker[resource_id] = ResourceHealth(resource_id=resource_id)
|
| |
|
| |
|
| | for category in list(self.resources.keys()):
|
| | count = len(self.resources[category])
|
| | if count < 10:
|
| | logger.warning(f"⚠️ Category '{category}' has only {count} resources. Consider adding more fallbacks (minimum 10 recommended).")
|
| | else:
|
| | logger.info(f"✅ Category '{category}' has {count} resources (>= 10 fallbacks available)")
|
| |
|
| | logger.info("📊 Loaded %s categories from %s", len(self.resources), self.resources_json_path)
|
| | for category, items in self.resources.items():
|
| | logger.info(" - %s: %s resources", category, len(items))
|
| |
|
| | def _count_total_resources(self) -> int:
|
| | """Count total resources"""
|
| | return sum(len(items) for items in self.resources.values())
|
| |
|
| | def get_available_resources(self, category: str, free_only: bool = True) -> List[Dict]:
|
| | """Get available resources sorted by priority"""
|
| | if category not in self.resources:
|
| | logger.warning(f"⚠️ Category '{category}' not found")
|
| | return []
|
| |
|
| | resources = self.resources[category]
|
| |
|
| |
|
| | if free_only:
|
| | resources = [r for r in resources if r.get('is_free', False)]
|
| |
|
| |
|
| | available = []
|
| | for resource in resources:
|
| | resource_id = resource['id']
|
| | health = self.health_tracker.get(resource_id)
|
| |
|
| | if health and health.is_available():
|
| | available.append(resource)
|
| |
|
| |
|
| | available.sort(
|
| | key=lambda r: self.health_tracker[r['id']].get_priority_score(),
|
| | reverse=True
|
| | )
|
| |
|
| | return available
|
| |
|
| | def get_best_resource(self, category: str, exclude_ids: List[str] = None) -> Optional[Dict]:
|
| | """Get best available resource for category"""
|
| | exclude_ids = exclude_ids or []
|
| | available = self.get_available_resources(category)
|
| |
|
| |
|
| | available = [r for r in available if r['id'] not in exclude_ids]
|
| |
|
| | if not available:
|
| | logger.warning(f"⚠️ No available resources for category '{category}'")
|
| | return None
|
| |
|
| |
|
| | best = available[0]
|
| | logger.debug(f"✅ Selected resource: {best['name']} (score: {self.health_tracker[best['id']].get_priority_score():.2f})")
|
| |
|
| | return best
|
| |
|
| | async def fetch_with_fallback(
|
| | self,
|
| | category: str,
|
| | endpoint_path: str = "",
|
| | params: Dict[str, Any] = None,
|
| | max_attempts: int = 15,
|
| | timeout: int = 10
|
| | ) -> Optional[Dict]:
|
| | """
|
| | Fetch data with intelligent fallback
|
| | Tries up to max_attempts resources until success
|
| | NEVER returns None if any resource is available
|
| | """
|
| | params = params or {}
|
| | attempted_ids = []
|
| |
|
| | for attempt in range(max_attempts):
|
| |
|
| | resource = self.get_best_resource(category, exclude_ids=attempted_ids)
|
| |
|
| | if not resource:
|
| |
|
| | if attempted_ids:
|
| | logger.error(f"❌ All {len(attempted_ids)} resources exhausted for '{category}'")
|
| | return None
|
| |
|
| | resource_id = resource['id']
|
| | attempted_ids.append(resource_id)
|
| |
|
| |
|
| | base_url = resource['base_url']
|
| | url = f"{base_url}{endpoint_path}" if endpoint_path else base_url
|
| |
|
| |
|
| | health = self.health_tracker[resource_id]
|
| | use_proxy = health.needs_proxy or self._needs_proxy(resource)
|
| |
|
| | try:
|
| |
|
| | start_time = time.time()
|
| |
|
| | if use_proxy and self.proxy_manager:
|
| | response_data = await self._fetch_with_proxy(url, params, timeout)
|
| | else:
|
| | response_data = await self._fetch_direct(url, params, timeout)
|
| |
|
| | response_time = time.time() - start_time
|
| |
|
| |
|
| | health.record_success(response_time)
|
| |
|
| | logger.info(f"✅ Success: {resource['name']} ({response_time:.2f}s)")
|
| |
|
| | return response_data
|
| |
|
| | except aiohttp.ClientError as e:
|
| |
|
| | error_str = str(e)
|
| | needs_proxy = "403" in error_str or "blocked" in error_str.lower()
|
| |
|
| | health.record_failure(needs_proxy=needs_proxy)
|
| |
|
| | logger.warning(f"⚠️ Failed: {resource['name']} - {error_str}")
|
| |
|
| |
|
| | continue
|
| |
|
| | except Exception as e:
|
| |
|
| | health.record_failure()
|
| | logger.error(f"❌ Error: {resource['name']} - {e}")
|
| | continue
|
| |
|
| |
|
| | logger.error(f"❌ CRITICAL: All {max_attempts} fallback attempts failed for '{category}'")
|
| | return None
|
| |
|
| | async def _fetch_direct(self, url: str, params: Dict, timeout: int) -> Dict:
|
| | """Fetch directly without proxy"""
|
| | async with aiohttp.ClientSession() as session:
|
| | async with session.get(url, params=params, timeout=timeout) as response:
|
| | response.raise_for_status()
|
| | return await response.json()
|
| |
|
| | async def _fetch_with_proxy(self, url: str, params: Dict, timeout: int) -> Dict:
|
| | """Fetch through proxy"""
|
| | if not self.proxy_manager:
|
| | raise Exception("Proxy manager not configured")
|
| |
|
| | proxy_url = await self.proxy_manager.get_proxy()
|
| |
|
| | async with aiohttp.ClientSession() as session:
|
| | async with session.get(
|
| | url,
|
| | params=params,
|
| | proxy=proxy_url,
|
| | timeout=timeout
|
| | ) as response:
|
| | response.raise_for_status()
|
| | return await response.json()
|
| |
|
| | def _needs_proxy(self, resource: Dict) -> bool:
|
| | """Check if resource likely needs proxy"""
|
| |
|
| | if 'binance' in resource['base_url'].lower():
|
| | return True
|
| |
|
| |
|
| | blocked_domains = ['binance.us', 'okex', 'huobi']
|
| |
|
| | return any(domain in resource['base_url'].lower() for domain in blocked_domains)
|
| |
|
| | def get_health_report(self) -> Dict:
|
| | """Get health report for all resources"""
|
| | report = {
|
| | 'total_resources': self._count_total_resources(),
|
| | 'by_status': {
|
| | 'active': 0,
|
| | 'degraded': 0,
|
| | 'failed': 0,
|
| | 'proxy_needed': 0,
|
| | 'blocked': 0
|
| | },
|
| | 'top_performers': [],
|
| | 'failing_resources': []
|
| | }
|
| |
|
| |
|
| | for health in self.health_tracker.values():
|
| | status_key = health.status.value
|
| | if status_key in report['by_status']:
|
| | report['by_status'][status_key] += 1
|
| |
|
| |
|
| | all_health = list(self.health_tracker.values())
|
| | all_health.sort(key=lambda h: h.get_priority_score(), reverse=True)
|
| |
|
| | report['top_performers'] = [
|
| | {
|
| | 'resource_id': h.resource_id,
|
| | 'score': h.get_priority_score(),
|
| | 'success_rate': h.success_count / max(h.success_count + h.failure_count, 1),
|
| | 'avg_response_time': h.avg_response_time
|
| | }
|
| | for h in all_health[:10]
|
| | ]
|
| |
|
| |
|
| | report['failing_resources'] = [
|
| | {
|
| | 'resource_id': h.resource_id,
|
| | 'status': h.status.value,
|
| | 'consecutive_failures': h.consecutive_failures,
|
| | 'needs_proxy': h.needs_proxy
|
| | }
|
| | for h in all_health
|
| | if h.status in [ResourceStatus.FAILED, ResourceStatus.BLOCKED]
|
| | ]
|
| |
|
| | return report
|
| |
|
| | def cleanup_failed_resources(self, max_age_hours: int = 24):
|
| | """Remove resources that have been failing for too long"""
|
| | now = datetime.now()
|
| | removed = []
|
| |
|
| | for resource_id, health in list(self.health_tracker.items()):
|
| | if health.status == ResourceStatus.FAILED:
|
| | if health.last_success:
|
| | age = (now - health.last_success).total_seconds() / 3600
|
| | if age > max_age_hours:
|
| |
|
| |
|
| | health.status = ResourceStatus.BLOCKED
|
| | removed.append(resource_id)
|
| |
|
| | if removed:
|
| | logger.info(f"🗑️ Marked {len(removed)} resources as blocked after {max_age_hours}h of failures")
|
| |
|
| | return removed
|
| |
|
| |
|
| |
|
| | _fallback_manager = None
|
| |
|
| | def get_fallback_manager() -> SmartFallbackManager:
|
| | """Get global fallback manager instance"""
|
| | global _fallback_manager
|
| | if _fallback_manager is None:
|
| | _fallback_manager = SmartFallbackManager()
|
| | return _fallback_manager
|
| |
|