Spaces:

sandeepmudhiraj
/

Meta-API

Running

App Files Files Community

sandeepmudhiraj commited on Apr 1

Commit

a0efa9c

verified ·

1 Parent(s): fdc57c8

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

Dockerfile +22 -0
README.md +6 -4
app.py +200 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git curl build-essential libffi-dev libssl-dev \
+    libyaml-dev libxml2-dev libxslt1-dev zlib1g-dev \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+RUN useradd -m appuser || true
+RUN chown -R 1000:1000 /app
+USER 1000
+ENV PORT=7860
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,12 @@
 ---
 title: Meta API
-emoji: 📈
-colorFrom: yellow
-colorTo: red
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Meta API
+emoji: ⚡
+colorFrom: gray
+colorTo: blue
 sdk: docker
 pinned: false
 ---
+# Meta API
+A lightweight data aggregation microservice.

app.py ADDED Viewed

	@@ -0,0 +1,200 @@

+#!/usr/bin/env python3
+"""
+Meta API - Aggregated web data service.
+Uses multiple public meta-search endpoints for redundancy.
+"""
+import asyncio
+import hashlib
+import json
+import os
+import random
+import time
+from typing import Optional
+import httpx
+import uvicorn
+from fastapi import FastAPI, Query, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+app = FastAPI(title="Meta API", docs_url=None, redoc_url=None)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Public SearXNG instances (regularly updated list)
+INSTANCES = [
+    "https://search.bus-hit.me",
+    "https://search.rhscz.eu",
+    "https://searx.tiekoetter.com",
+    "https://search.sapti.me",
+    "https://searx.be",
+    "https://search.ononoki.org",
+    "https://searx.namejeff.xyz",
+    "https://searx.work",
+    "https://search.projectsegfau.lt",
+    "https://searx.oxull.uk",
+    "https://search.hbubli.cc",
+    "https://search.mdosch.de",
+    "https://priv.au",
+    "https://paulgo.io",
+    "https://s.mble.dk",
+    "https://searx.fmhy.net",
+    "https://searxng.online",
+    "https://searx.tuxcloud.net",
+    "https://search.inetol.net",
+    "https://search.im-in.space",
+]
+# Track instance health
+instance_failures: dict[str, int] = {}
+instance_last_fail: dict[str, float] = {}
+FAILURE_THRESHOLD = 3
+COOLDOWN_SECONDS = 300  # 5 min cooldown after failures
+# Simple response cache
+_cache: dict[str, tuple[float, dict]] = {}
+CACHE_TTL = 300  # 5 minutes
+def _get_healthy_instances() -> list[str]:
+    """Get instances that haven't exceeded failure threshold."""
+    now = time.time()
+    healthy = []
+    for inst in INSTANCES:
+        fails = instance_failures.get(inst, 0)
+        if fails < FAILURE_THRESHOLD:
+            healthy.append(inst)
+        else:
+            # Check cooldown
+            last_fail = instance_last_fail.get(inst, 0)
+            if now - last_fail > COOLDOWN_SECONDS:
+                instance_failures[inst] = 0
+                healthy.append(inst)
+    return healthy if healthy else INSTANCES  # fallback to all if none healthy
+def _record_success(instance: str):
+    instance_failures[instance] = 0
+def _record_failure(instance: str):
+    instance_failures[instance] = instance_failures.get(instance, 0) + 1
+    instance_last_fail[instance] = time.time()
+def _cache_key(query: str, **kwargs) -> str:
+    raw = json.dumps({"q": query, **kwargs}, sort_keys=True)
+    return hashlib.md5(raw.encode()).hexdigest()
+@app.get("/")
+async def root():
+    return {"status": "ok", "service": "meta-api", "version": "1.0.0"}
+@app.get("/health")
+async def health():
+    healthy = _get_healthy_instances()
+    return {
+        "status": "healthy",
+        "available_sources": len(healthy),
+        "total_sources": len(INSTANCES),
+    }
+@app.get("/search")
+async def search(
+    q: str = Query(..., description="Search query"),
+    format: str = Query("json", description="Output format"),
+    categories: str = Query("general", description="Search categories"),
+    language: str = Query("en", description="Language"),
+    time_range: Optional[str] = Query(None, description="Time range filter"),
+    pageno: int = Query(1, description="Page number"),
+):
+    """Perform aggregated search across multiple sources."""
+    # Check cache
+    ck = _cache_key(q, categories=categories, language=language, pageno=pageno)
+    if ck in _cache:
+        cached_time, cached_data = _cache[ck]
+        if time.time() - cached_time < CACHE_TTL:
+            return JSONResponse(content=cached_data)
+    healthy = _get_healthy_instances()
+    random.shuffle(healthy)
+    params = {
+        "q": q,
+        "format": "json",
+        "categories": categories,
+        "language": language,
+        "pageno": pageno,
+    }
+    if time_range:
+        params["time_range"] = time_range
+    # Try instances with concurrent requests (pick 3 random healthy ones)
+    candidates = healthy[:5]
+    async def try_instance(instance: str) -> Optional[dict]:
+        url = f"{instance.rstrip('/')}/search"
+        try:
+            async with httpx.AsyncClient(timeout=12.0, follow_redirects=True) as client:
+                resp = await client.get(
+                    url,
+                    params=params,
+                    headers={
+                        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
+                        "Accept": "application/json",
+                    },
+                )
+                if resp.status_code == 200:
+                    data = resp.json()
+                    if "results" in data and len(data["results"]) > 0:
+                        _record_success(instance)
+                        return data
+                _record_failure(instance)
+                return None
+        except Exception:
+            _record_failure(instance)
+            return None
+    # Race multiple instances
+    tasks = [try_instance(inst) for inst in candidates]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    for result in results:
+        if isinstance(result, dict) and result:
+            # Cache it
+            _cache[ck] = (time.time(), result)
+            # Clean old cache entries
+            now = time.time()
+            expired = [k for k, (t, _) in _cache.items() if now - t > CACHE_TTL]
+            for k in expired:
+                del _cache[k]
+            return JSONResponse(content=result)
+    # If concurrent failed, try remaining instances sequentially
+    for instance in healthy[5:]:
+        result = await try_instance(instance)
+        if result:
+            _cache[ck] = (time.time(), result)
+            return JSONResponse(content=result)
+    return JSONResponse(
+        status_code=503,
+        content={
+            "error": "All sources temporarily unavailable",
+            "query": q,
+            "results": [],
+        },
+    )
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port, log_level="info")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+httpx>=0.25.0
+pyyaml>=6.0
+lxml>=4.9.0