Spaces:

MB-IDK
/

CroxyProxyAPI

Running

App Files Files Community

CroxyProxyAPI / app.py

MB-IDK

Update app.py

c1ec4b6 verified 1 day ago

raw

history blame contribute delete

11 kB

	"""
	CroxyProxy Rotating Proxy API - HuggingFace Spaces
	90 servers: Paris, LA, Dallas, Warsaw, Amsterdam...

	GET /health - Status
	GET /servers - List 90 servers
	POST /proxy/fetch - Rotating proxy
	POST /proxy/random - Random server
	POST /proxy/batch - Multiple URLs
	"""

	import json, base64, re, random, time, threading
	from datetime import datetime, timezone
	from flask import Flask, request, jsonify
	from bs4 import BeautifulSoup
	import cloudscraper
	from html import unescape
	import warnings
	warnings.filterwarnings("ignore")

	BASE = "https://www.croxyproxy.com"
	app = Flask(__name__)

	# ── Headers à garder dans la réponse (tout le reste = poubelle) ──
	KEEP_HEADERS = {
	"content-type", "content-length", "content-encoding",
	"server", "date", "connection",
	"access-control-allow-origin",
	"access-control-allow-credentials",
	"cache-control", "etag", "last-modified",
	"x-ratelimit-limit", "x-ratelimit-remaining",
	"x-request-id", "location", "retry-after",
	}

	# ── Headers toujours exclus (bruit du proxy) ──
	DROP_HEADERS = {
	"set-cookie", "__cph", "__cpc",
	"content-security-policy", "strict-transport-security",
	"referrer-policy", "access-control-allow-headers",
	"x-frame-options", "x-content-type-options",
	"permissions-policy", "cross-origin-opener-policy",
	"cross-origin-embedder-policy",
	}


	class S:
	servers = []
	idx = 0
	lock = threading.Lock()
	last = None
	stats = {"req": 0, "ok": 0, "fail": 0}


	def dec(e):
	try:
	return json.loads(bytes.fromhex(base64.b64decode(e).decode()).decode())
	except Exception:
	return None


	def filter_headers(raw_headers, include_all=False):
	"""Filtre les headers : garde uniquement les utiles."""
	if include_all:
	return dict(raw_headers)

	cleaned = {}
	for k, v in raw_headers.items():
	kl = k.lower()
	if kl in DROP_HEADERS:
	continue
	if kl in KEEP_HEADERS:
	cleaned[k] = v
	return cleaned


	def parse_body(text, content_type=""):
	"""Parse le body en JSON si possible, sinon tronque le texte."""
	if not text:
	return None

	# Tente JSON
	if "json" in content_type.lower() or text.strip().startswith(("{", "[")):
	try:
	return json.loads(text)
	except (json.JSONDecodeError, ValueError):
	pass

	# HTML → tronqué
	if "html" in content_type.lower() or text.strip().startswith("<"):
	return {
	"_type": "html",
	"_length": len(text),
	"_preview": text[:300].strip() + ("..." if len(text) > 300 else ""),
	}

	# Texte brut → tronqué si long
	if len(text) > 2000:
	return {
	"_type": "text",
	"_length": len(text),
	"_preview": text[:500].strip() + "...",
	}

	return text


	def extract_ip(url_str):
	"""Extrait l'IP d'une URL de serveur proxy."""
	return (url_str or "").replace("https://", "").replace("http://", "").split("/")[0]


	def format_result(raw, include_raw_headers=False):
	"""Formate proprement le résultat d'un fetch."""
	if not raw.get("success"):
	return {
	"success": False,
	"error": raw.get("error"),
	"server": raw.get("server"),
	}

	ct = ""
	if raw.get("headers"):
	ct = raw["headers"].get("Content-Type", raw["headers"].get("content-type", ""))

	result = {
	"success": True,
	"status": raw.get("status"),
	"url": raw.get("url"),
	"body": parse_body(raw.get("body", ""), ct),
	"proxy": raw.get("proxy"),
	"servers_available": raw.get("servers_available"),
	}

	# Headers filtrés
	if raw.get("headers"):
	result["headers"] = filter_headers(raw["headers"], include_all=include_raw_headers)

	return result


	def fetch_raw(url, sid=None):
	"""Fetch via CroxyProxy — retourne les données brutes."""
	sc = cloudscraper.create_scraper(
	browser={"browser": "chrome", "platform": "windows", "desktop": True}
	)
	S.stats["req"] += 1

	try:
	# 1. GET / → csrf
	r1 = sc.get(BASE, timeout=30)
	if r1.status_code != 200:
	S.stats["fail"] += 1
	return {"success": False, "error": f"Homepage {r1.status_code}"}

	s1 = BeautifulSoup(r1.text, "lxml")
	ci = s1.find("input", {"name": "csrf"})
	if not ci:
	S.stats["fail"] += 1
	return {"success": False, "error": "No CSRF"}

	# 2. POST /servers → selector page
	r2 = sc.post(
	f"{BASE}/servers",
	data={
	"url": url, "proxyServerId": "274",
	"csrf": ci["value"], "demo": "0",
	"frontOrigin": BASE,
	},
	headers={
	"Content-Type": "application/x-www-form-urlencoded",
	"Origin": BASE,
	"Referer": BASE + "/",
	},
	allow_redirects=True,
	timeout=30,
	)

	if r2.status_code != 200:
	S.stats["fail"] += 1
	return {"success": False, "error": f"Servers {r2.status_code}"}

	s2 = BeautifulSoup(r2.text, "lxml")
	sel = s2.find("script", {"id": "serverSelectorScript"})
	if not sel:
	S.stats["fail"] += 1
	return {"success": False, "error": "No selector"}

	# 3. Parse servers + csrf2
	ss = [
	x for x in (dec(i) for i in json.loads(unescape(sel.get("data-ss", ""))))
	if x and x.get("id")
	]
	csrf2 = unescape(sel.get("data-csrf", "")).strip('"')
	fo = unescape(sel.get("data-fo", "")).strip('"')

	if not ss:
	S.stats["fail"] += 1
	return {"success": False, "error": "No servers"}

	# Mettre à jour le cache
	S.servers = ss
	S.last = datetime.now(timezone.utc).isoformat()

	# Choisir le serveur
	ch = None
	if sid:
	ch = next((x for x in ss if x["id"] == sid), None)
	if not ch:
	with S.lock:
	ch = ss[S.idx % len(ss)]
	S.idx += 1

	# 4. POST /requests → 302
	r3 = sc.post(
	f"{BASE}/requests?fso=",
	data={
	"url": url, "proxyServerId": str(ch["id"]),
	"csrf": csrf2, "demo": "0", "frontOrigin": fo,
	},
	headers={
	"Content-Type": "application/x-www-form-urlencoded",
	"Origin": BASE,
	"Referer": f"{BASE}/servers",
	},
	allow_redirects=False,
	timeout=30,
	)

	loc = r3.headers.get("Location") or r3.headers.get("location")
	if not loc:
	S.stats["fail"] += 1
	return {
	"success": False,
	"error": f"No redirect ({r3.status_code})",
	"server": ch.get("name"),
	}

	# 5. GET redirect → data-r
	r4 = sc.get(loc, timeout=30, allow_redirects=True)
	dr = re.search(r'data-r="([^"]+)"', r4.text)
	if not dr:
	S.stats["fail"] += 1
	return {"success": False, "error": "No data-r", "server": ch.get("name")}

	# 6. GET final
	final = base64.b64decode(dr.group(1)).decode()
	r5 = sc.get(final, timeout=30, allow_redirects=True)

	S.stats["ok"] += 1
	return {
	"success": True,
	"status": r5.status_code,
	"headers": dict(r5.headers),
	"body": r5.text,
	"url": url,
	"proxy": {
	"server_id": ch["id"],
	"server_name": ch.get("name"),
	"ip": extract_ip(ch.get("url", "")),
	},
	"servers_available": len(ss),
	}
	except Exception as e:
	S.stats["fail"] += 1
	return {"success": False, "error": str(e)}


	# ═══════════════════════════════════════════════
	# ROUTES
	# ═══════════════════════════════════════════════

	@app.route("/")
	def index():
	return jsonify({
	"name": "CroxyProxy Rotating Proxy API",
	"version": "2.0",
	"endpoints": {
	"GET /health": "Status + stats",
	"GET /servers": "List all servers",
	"POST /proxy/fetch": "Rotating proxy {url, server_id?, raw_headers?}",
	"POST /proxy/random": "Random server {url, raw_headers?}",
	"POST /proxy/batch": "Multiple URLs {urls: [...], raw_headers?}",
	},
	"notes": {
	"raw_headers": "Set to true to get ALL response headers (default: filtered)",
	"body": "JSON bodies are auto-parsed. HTML is truncated with preview.",
	},
	})


	@app.route("/health")
	def health():
	return jsonify({
	"status": "ready",
	"servers": len(S.servers),
	"last_refresh": S.last,
	"stats": S.stats,
	})


	@app.route("/servers")
	def servers():
	return jsonify({
	"count": len(S.servers),
	"servers": [
	{
	"id": s.get("id"),
	"name": s.get("name"),
	"ip": extract_ip(s.get("url", "")),
	}
	for s in S.servers
	],
	})


	@app.route("/proxy/fetch", methods=["POST"])
	def proxy_fetch():
	d = request.get_json() or {}
	if not d.get("url"):
	return jsonify({"error": "url required"}), 400

	raw = fetch_raw(d["url"], d.get("server_id"))
	return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))


	@app.route("/proxy/random", methods=["POST"])
	def proxy_random():
	d = request.get_json() or {}
	if not d.get("url"):
	return jsonify({"error": "url required"}), 400

	sid = random.choice(S.servers)["id"] if S.servers else None
	raw = fetch_raw(d["url"], sid)
	return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))


	@app.route("/proxy/batch", methods=["POST"])
	def proxy_batch():
	d = request.get_json() or {}
	urls = d.get("urls", [])
	if not urls:
	return jsonify({"error": "urls required"}), 400

	include_raw = d.get("raw_headers", False)
	results = []
	for u in urls:
	raw = fetch_raw(u)
	results.append(format_result(raw, include_raw_headers=include_raw))
	time.sleep(0.5)

	return jsonify({
	"count": len(results),
	"success_count": sum(1 for r in results if r.get("success")),
	"results": results,
	})


	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)