CroxyProxyAPI / app.py
MB-IDK's picture
Update app.py
c1ec4b6 verified
"""
CroxyProxy Rotating Proxy API - HuggingFace Spaces
90 servers: Paris, LA, Dallas, Warsaw, Amsterdam...
GET /health - Status
GET /servers - List 90 servers
POST /proxy/fetch - Rotating proxy
POST /proxy/random - Random server
POST /proxy/batch - Multiple URLs
"""
import json, base64, re, random, time, threading
from datetime import datetime, timezone
from flask import Flask, request, jsonify
from bs4 import BeautifulSoup
import cloudscraper
from html import unescape
import warnings
warnings.filterwarnings("ignore")
BASE = "https://www.croxyproxy.com"
app = Flask(__name__)
# ── Headers Γ  garder dans la rΓ©ponse (tout le reste = poubelle) ──
KEEP_HEADERS = {
"content-type", "content-length", "content-encoding",
"server", "date", "connection",
"access-control-allow-origin",
"access-control-allow-credentials",
"cache-control", "etag", "last-modified",
"x-ratelimit-limit", "x-ratelimit-remaining",
"x-request-id", "location", "retry-after",
}
# ── Headers toujours exclus (bruit du proxy) ──
DROP_HEADERS = {
"set-cookie", "__cph", "__cpc",
"content-security-policy", "strict-transport-security",
"referrer-policy", "access-control-allow-headers",
"x-frame-options", "x-content-type-options",
"permissions-policy", "cross-origin-opener-policy",
"cross-origin-embedder-policy",
}
class S:
servers = []
idx = 0
lock = threading.Lock()
last = None
stats = {"req": 0, "ok": 0, "fail": 0}
def dec(e):
try:
return json.loads(bytes.fromhex(base64.b64decode(e).decode()).decode())
except Exception:
return None
def filter_headers(raw_headers, include_all=False):
"""Filtre les headers : garde uniquement les utiles."""
if include_all:
return dict(raw_headers)
cleaned = {}
for k, v in raw_headers.items():
kl = k.lower()
if kl in DROP_HEADERS:
continue
if kl in KEEP_HEADERS:
cleaned[k] = v
return cleaned
def parse_body(text, content_type=""):
"""Parse le body en JSON si possible, sinon tronque le texte."""
if not text:
return None
# Tente JSON
if "json" in content_type.lower() or text.strip().startswith(("{", "[")):
try:
return json.loads(text)
except (json.JSONDecodeError, ValueError):
pass
# HTML β†’ tronquΓ©
if "html" in content_type.lower() or text.strip().startswith("<"):
return {
"_type": "html",
"_length": len(text),
"_preview": text[:300].strip() + ("..." if len(text) > 300 else ""),
}
# Texte brut β†’ tronquΓ© si long
if len(text) > 2000:
return {
"_type": "text",
"_length": len(text),
"_preview": text[:500].strip() + "...",
}
return text
def extract_ip(url_str):
"""Extrait l'IP d'une URL de serveur proxy."""
return (url_str or "").replace("https://", "").replace("http://", "").split("/")[0]
def format_result(raw, include_raw_headers=False):
"""Formate proprement le rΓ©sultat d'un fetch."""
if not raw.get("success"):
return {
"success": False,
"error": raw.get("error"),
"server": raw.get("server"),
}
ct = ""
if raw.get("headers"):
ct = raw["headers"].get("Content-Type", raw["headers"].get("content-type", ""))
result = {
"success": True,
"status": raw.get("status"),
"url": raw.get("url"),
"body": parse_body(raw.get("body", ""), ct),
"proxy": raw.get("proxy"),
"servers_available": raw.get("servers_available"),
}
# Headers filtrΓ©s
if raw.get("headers"):
result["headers"] = filter_headers(raw["headers"], include_all=include_raw_headers)
return result
def fetch_raw(url, sid=None):
"""Fetch via CroxyProxy β€” retourne les donnΓ©es brutes."""
sc = cloudscraper.create_scraper(
browser={"browser": "chrome", "platform": "windows", "desktop": True}
)
S.stats["req"] += 1
try:
# 1. GET / β†’ csrf
r1 = sc.get(BASE, timeout=30)
if r1.status_code != 200:
S.stats["fail"] += 1
return {"success": False, "error": f"Homepage {r1.status_code}"}
s1 = BeautifulSoup(r1.text, "lxml")
ci = s1.find("input", {"name": "csrf"})
if not ci:
S.stats["fail"] += 1
return {"success": False, "error": "No CSRF"}
# 2. POST /servers β†’ selector page
r2 = sc.post(
f"{BASE}/servers",
data={
"url": url, "proxyServerId": "274",
"csrf": ci["value"], "demo": "0",
"frontOrigin": BASE,
},
headers={
"Content-Type": "application/x-www-form-urlencoded",
"Origin": BASE,
"Referer": BASE + "/",
},
allow_redirects=True,
timeout=30,
)
if r2.status_code != 200:
S.stats["fail"] += 1
return {"success": False, "error": f"Servers {r2.status_code}"}
s2 = BeautifulSoup(r2.text, "lxml")
sel = s2.find("script", {"id": "serverSelectorScript"})
if not sel:
S.stats["fail"] += 1
return {"success": False, "error": "No selector"}
# 3. Parse servers + csrf2
ss = [
x for x in (dec(i) for i in json.loads(unescape(sel.get("data-ss", ""))))
if x and x.get("id")
]
csrf2 = unescape(sel.get("data-csrf", "")).strip('"')
fo = unescape(sel.get("data-fo", "")).strip('"')
if not ss:
S.stats["fail"] += 1
return {"success": False, "error": "No servers"}
# Mettre Γ  jour le cache
S.servers = ss
S.last = datetime.now(timezone.utc).isoformat()
# Choisir le serveur
ch = None
if sid:
ch = next((x for x in ss if x["id"] == sid), None)
if not ch:
with S.lock:
ch = ss[S.idx % len(ss)]
S.idx += 1
# 4. POST /requests β†’ 302
r3 = sc.post(
f"{BASE}/requests?fso=",
data={
"url": url, "proxyServerId": str(ch["id"]),
"csrf": csrf2, "demo": "0", "frontOrigin": fo,
},
headers={
"Content-Type": "application/x-www-form-urlencoded",
"Origin": BASE,
"Referer": f"{BASE}/servers",
},
allow_redirects=False,
timeout=30,
)
loc = r3.headers.get("Location") or r3.headers.get("location")
if not loc:
S.stats["fail"] += 1
return {
"success": False,
"error": f"No redirect ({r3.status_code})",
"server": ch.get("name"),
}
# 5. GET redirect β†’ data-r
r4 = sc.get(loc, timeout=30, allow_redirects=True)
dr = re.search(r'data-r="([^"]+)"', r4.text)
if not dr:
S.stats["fail"] += 1
return {"success": False, "error": "No data-r", "server": ch.get("name")}
# 6. GET final
final = base64.b64decode(dr.group(1)).decode()
r5 = sc.get(final, timeout=30, allow_redirects=True)
S.stats["ok"] += 1
return {
"success": True,
"status": r5.status_code,
"headers": dict(r5.headers),
"body": r5.text,
"url": url,
"proxy": {
"server_id": ch["id"],
"server_name": ch.get("name"),
"ip": extract_ip(ch.get("url", "")),
},
"servers_available": len(ss),
}
except Exception as e:
S.stats["fail"] += 1
return {"success": False, "error": str(e)}
# ═══════════════════════════════════════════════
# ROUTES
# ═══════════════════════════════════════════════
@app.route("/")
def index():
return jsonify({
"name": "CroxyProxy Rotating Proxy API",
"version": "2.0",
"endpoints": {
"GET /health": "Status + stats",
"GET /servers": "List all servers",
"POST /proxy/fetch": "Rotating proxy {url, server_id?, raw_headers?}",
"POST /proxy/random": "Random server {url, raw_headers?}",
"POST /proxy/batch": "Multiple URLs {urls: [...], raw_headers?}",
},
"notes": {
"raw_headers": "Set to true to get ALL response headers (default: filtered)",
"body": "JSON bodies are auto-parsed. HTML is truncated with preview.",
},
})
@app.route("/health")
def health():
return jsonify({
"status": "ready",
"servers": len(S.servers),
"last_refresh": S.last,
"stats": S.stats,
})
@app.route("/servers")
def servers():
return jsonify({
"count": len(S.servers),
"servers": [
{
"id": s.get("id"),
"name": s.get("name"),
"ip": extract_ip(s.get("url", "")),
}
for s in S.servers
],
})
@app.route("/proxy/fetch", methods=["POST"])
def proxy_fetch():
d = request.get_json() or {}
if not d.get("url"):
return jsonify({"error": "url required"}), 400
raw = fetch_raw(d["url"], d.get("server_id"))
return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
@app.route("/proxy/random", methods=["POST"])
def proxy_random():
d = request.get_json() or {}
if not d.get("url"):
return jsonify({"error": "url required"}), 400
sid = random.choice(S.servers)["id"] if S.servers else None
raw = fetch_raw(d["url"], sid)
return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
@app.route("/proxy/batch", methods=["POST"])
def proxy_batch():
d = request.get_json() or {}
urls = d.get("urls", [])
if not urls:
return jsonify({"error": "urls required"}), 400
include_raw = d.get("raw_headers", False)
results = []
for u in urls:
raw = fetch_raw(u)
results.append(format_result(raw, include_raw_headers=include_raw))
time.sleep(0.5)
return jsonify({
"count": len(results),
"success_count": sum(1 for r in results if r.get("success")),
"results": results,
})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)