//! HTTP Service — portable production backend with simple challenge detection. //! //! This backend uses `reqwest` + `rustls` for reliable cross-platform builds and //! native-library embedding in C++ apps. It sends browser-like HTTP headers, //! supports HTTP/2, cookies, gzip/brotli/deflate, caching, max response limits, //! and plugin-provided header overrides. //! //! It also detects common anti-bot challenge pages and returns a structured //! `CHALLENGE_REQUIRED` error string. The host app can then decide whether to //! retry with cookies, a platform browser session, an external fetcher, or a //! user-visible WebView/browser flow. use reqwest::Client; use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use tokio::sync::RwLock; pub const DEFAULT_BROWSER_UA: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"; pub fn browser_default_headers() -> Vec<(&'static str, &'static str)> { vec![ ("User-Agent", DEFAULT_BROWSER_UA), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"), ("Accept-Language", "en-US,en;q=0.9"), ("Accept-Encoding", "gzip, deflate, br"), ("Sec-CH-UA", "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\""), ("Sec-CH-UA-Mobile", "?0"), ("Sec-CH-UA-Platform", "\"Windows\""), ("Sec-Fetch-Dest", "document"), ("Sec-Fetch-Mode", "navigate"), ("Sec-Fetch-Site", "none"), ("Sec-Fetch-User", "?1"), ("Upgrade-Insecure-Requests", "1"), ("Connection", "keep-alive"), ("DNT", "1"), ] } #[derive(Clone)] struct CacheEntry { body: Vec, status: u16, headers: HashMap, final_url: String, inserted_at: std::time::Instant, max_age: Duration, } impl CacheEntry { fn is_fresh(&self) -> bool { self.inserted_at.elapsed() < self.max_age } } pub struct HttpHostService { client: Client, cache: Arc>>, } #[derive(Debug, Clone, serde::Serialize)] struct ChallengeInfo { code: &'static str, provider: String, status: u16, url: String, final_url: String, domain: String, hint: &'static str, } fn header_value<'a>(headers: &'a HashMap, name: &str) -> Option<&'a str> { headers .iter() .find(|(k, _)| k.eq_ignore_ascii_case(name)) .map(|(_, v)| v.as_str()) } fn detect_antibot_challenge( status: u16, url: &str, final_url: &str, headers: &HashMap, body: &[u8], ) -> Option { let status_suspicious = matches!(status, 403 | 429 | 503); let body_sample = String::from_utf8_lossy(&body[..body.len().min(64 * 1024)]).to_ascii_lowercase(); let mut provider = None::<&str>; if header_value(headers, "cf-ray").is_some() || header_value(headers, "server").map(|v| v.to_ascii_lowercase().contains("cloudflare")).unwrap_or(false) || body_sample.contains("cf-chl-") || body_sample.contains("checking your browser") || body_sample.contains("just a moment") || body_sample.contains("cloudflare") || final_url.contains("/cdn-cgi/challenge-platform/") { provider = Some("cloudflare"); } else if header_value(headers, "x-datadome").is_some() || body_sample.contains("datadome") { provider = Some("datadome"); } else if body_sample.contains("px-captcha") || body_sample.contains("perimeterx") || header_value(headers, "x-px").is_some() { provider = Some("perimeterx"); } else if body_sample.contains("akamai") && (body_sample.contains("bot") || body_sample.contains("denied")) { provider = Some("akamai"); } else if body_sample.contains("captcha") || body_sample.contains("turnstile") { provider = Some("captcha"); } let provider = provider?; if !status_suspicious && provider == "captcha" { return None; } let domain = url::Url::parse(final_url) .or_else(|_| url::Url::parse(url)) .ok() .and_then(|u| u.host_str().map(|s| s.to_string())) .unwrap_or_default(); Some(ChallengeInfo { code: "CHALLENGE_REQUIRED", provider: provider.to_string(), status, url: url.to_string(), final_url: final_url.to_string(), domain, hint: "Host app should retry with stored cookies, browser-backed fetch, or external fetcher.", }) } impl HttpHostService { pub fn new( _user_agent: &str, timeout_ms: u32, pool_idle_timeout_ms: u64, pool_max_idle_per_host: usize, ) -> Self { let mut headers = reqwest::header::HeaderMap::new(); for (k, v) in browser_default_headers() { if let (Ok(name), Ok(val)) = ( reqwest::header::HeaderName::from_bytes(k.as_bytes()), reqwest::header::HeaderValue::from_str(v), ) { headers.insert(name, val); } } let client = Client::builder() .timeout(Duration::from_millis(timeout_ms as u64)) .redirect(reqwest::redirect::Policy::limited(10)) .default_headers(headers) .pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms)) .pool_max_idle_per_host(pool_max_idle_per_host) .use_rustls_tls() .gzip(true) .brotli(true) .deflate(true) .http2_prior_knowledge(false) .cookie_store(true) .build() .expect("failed to build HTTP client"); Self { client, cache: Arc::new(RwLock::new(HashMap::new())), } } async fn check_cache(&self, url: &str) -> Option<(u16, Vec, HashMap, String)> { let cache = self.cache.read().await; cache.get(url).filter(|entry| entry.is_fresh()).map(|entry| { ( entry.status, entry.body.clone(), entry.headers.clone(), entry.final_url.clone(), ) }) } async fn store_cache( &self, url: &str, status: u16, body: Vec, headers: &HashMap, final_url: &str, ) { if !(200..300).contains(&status) || body.len() > 2 * 1024 * 1024 { return; } let max_age = if let Some(cc) = headers.get("cache-control") { let cc_l = cc.to_ascii_lowercase(); if cc_l.contains("no-store") || cc_l.contains("no-cache") || cc_l.contains("private") { return; } if let Some(pos) = cc_l.find("max-age=") { let rest = &cc_l[pos + 8..]; let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len()); rest[..end] .parse::() .ok() .map(|secs| Duration::from_secs(secs.min(300))) .unwrap_or(Duration::from_secs(60)) } else { Duration::from_secs(60) } } else { Duration::from_secs(60) }; let mut cache = self.cache.write().await; if cache.len() >= 500 { cache.retain(|_, v| v.is_fresh()); } if cache.len() >= 500 { return; } cache.insert( url.to_string(), CacheEntry { body, status, headers: headers.clone(), final_url: final_url.to_string(), inserted_at: std::time::Instant::now(), max_age, }, ); } pub async fn send_request( &self, method: &str, url: &str, headers: Vec<(String, String)>, body: Option>, timeout_ms: Option, ) -> anyhow::Result<(u16, Vec, HashMap, String)> { if method == "GET" { if let Some(cached) = self.check_cache(url).await { return Ok(cached); } } let mut req = match method { "POST" => self.client.post(url), "PUT" => self.client.put(url), "DELETE" => self.client.delete(url), "HEAD" => self.client.head(url), "PATCH" => self.client.patch(url), _ => self.client.get(url), }; for (k, v) in &headers { req = req.header(k.as_str(), v.as_str()); } let has_referer = headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("referer")); if !has_referer { if let Ok(parsed) = url::Url::parse(url) { if let Some(host) = parsed.host_str() { let origin = format!("{}://{}/", parsed.scheme(), host); req = req.header("Referer", &origin); } } } if let Some(b) = body { req = req.body(b); } if let Some(ms) = timeout_ms { req = req.timeout(Duration::from_millis(ms as u64)); } let resp = req.send().await?; let status = resp.status().as_u16(); let final_url = resp.url().to_string(); let resp_headers: HashMap = resp .headers() .iter() .map(|(k, v)| (k.to_string(), v.to_str().unwrap_or("").to_string())) .collect(); let resp_body = resp.bytes().await?.to_vec(); if let Some(challenge) = detect_antibot_challenge(status, url, &final_url, &resp_headers, &resp_body) { let json = serde_json::to_string(&challenge).unwrap_or_else(|_| "{\"code\":\"CHALLENGE_REQUIRED\"}".to_string()); anyhow::bail!(json); } if method == "GET" { self.store_cache(url, status, resp_body.clone(), &resp_headers, &final_url) .await; } Ok((status, resp_body, resp_headers, final_url)) } pub async fn clear_cache(&self) { self.cache.write().await.clear(); } }