Upload crates/bex-core/src/http_service.rs
Browse files
crates/bex-core/src/http_service.rs
CHANGED
|
@@ -4,6 +4,50 @@ use std::sync::Arc;
|
|
| 4 |
use std::time::Duration;
|
| 5 |
use tokio::sync::RwLock;
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
/// Cache entry for HTTP responses.
|
| 8 |
#[derive(Clone)]
|
| 9 |
struct CacheEntry {
|
|
@@ -28,18 +72,40 @@ pub struct HttpHostService {
|
|
| 28 |
|
| 29 |
impl HttpHostService {
|
| 30 |
pub fn new(
|
| 31 |
-
|
| 32 |
timeout_ms: u32,
|
| 33 |
pool_idle_timeout_ms: u64,
|
| 34 |
pool_max_idle_per_host: usize,
|
| 35 |
) -> Self {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
let client = Client::builder()
|
| 37 |
.timeout(Duration::from_millis(timeout_ms as u64))
|
| 38 |
.redirect(reqwest::redirect::Policy::limited(10))
|
| 39 |
-
.
|
| 40 |
.pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms))
|
| 41 |
.pool_max_idle_per_host(pool_max_idle_per_host)
|
| 42 |
.use_rustls_tls()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
.build()
|
| 44 |
.expect("failed to build HTTP client");
|
| 45 |
|
|
@@ -65,7 +131,7 @@ impl HttpHostService {
|
|
| 65 |
None
|
| 66 |
}
|
| 67 |
|
| 68 |
-
/// Store response in cache
|
| 69 |
async fn store_cache(
|
| 70 |
&self,
|
| 71 |
url: &str,
|
|
@@ -74,28 +140,23 @@ impl HttpHostService {
|
|
| 74 |
headers: &HashMap<String, String>,
|
| 75 |
final_url: &str,
|
| 76 |
) {
|
| 77 |
-
// Only cache successful GET responses with cacheable status
|
| 78 |
if status != 200 && status != 301 && status != 302 {
|
| 79 |
return;
|
| 80 |
}
|
| 81 |
-
|
| 82 |
-
if body.len() > 1024 * 1024 {
|
| 83 |
return;
|
| 84 |
}
|
| 85 |
|
| 86 |
-
// Determine max-age from Cache-Control header
|
| 87 |
let max_age = if let Some(cc) = headers.get("cache-control") {
|
| 88 |
if cc.contains("no-store") || cc.contains("no-cache") || cc.contains("private") {
|
| 89 |
-
return;
|
| 90 |
}
|
| 91 |
if let Some(pos) = cc.find("max-age=") {
|
| 92 |
let rest = &cc[pos + 8..];
|
| 93 |
let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
|
| 94 |
-
|
| 95 |
-
Duration::from_secs(secs.min(300))
|
| 96 |
-
|
| 97 |
-
Duration::from_secs(60)
|
| 98 |
-
}
|
| 99 |
} else {
|
| 100 |
Duration::from_secs(60)
|
| 101 |
}
|
|
@@ -103,14 +164,15 @@ impl HttpHostService {
|
|
| 103 |
Duration::from_secs(60)
|
| 104 |
};
|
| 105 |
|
| 106 |
-
let cache = self.cache.
|
| 107 |
-
//
|
| 108 |
if cache.len() >= 500 {
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
| 110 |
}
|
| 111 |
-
drop(cache);
|
| 112 |
|
| 113 |
-
let mut cache = self.cache.write().await;
|
| 114 |
cache.insert(
|
| 115 |
url.to_string(),
|
| 116 |
CacheEntry {
|
|
@@ -132,7 +194,7 @@ impl HttpHostService {
|
|
| 132 |
body: Option<Vec<u8>>,
|
| 133 |
timeout_ms: Option<u32>,
|
| 134 |
) -> anyhow::Result<(u16, Vec<u8>, HashMap<String, String>, String)> {
|
| 135 |
-
//
|
| 136 |
if method == "GET" {
|
| 137 |
if let Some(cached) = self.check_cache(url).await {
|
| 138 |
return Ok(cached);
|
|
@@ -148,8 +210,22 @@ impl HttpHostService {
|
|
| 148 |
_ => self.client.get(url),
|
| 149 |
};
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
}
|
| 154 |
|
| 155 |
if let Some(b) = body {
|
|
|
|
| 4 |
use std::time::Duration;
|
| 5 |
use tokio::sync::RwLock;
|
| 6 |
|
| 7 |
+
/// Real Chrome 137 headers used as defaults when plugins don't provide their own.
|
| 8 |
+
/// This prevents bot detection by mimicking a real browser's header fingerprint.
|
| 9 |
+
pub const DEFAULT_BROWSER_UA: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36";
|
| 10 |
+
|
| 11 |
+
/// Standard browser headers that MUST be sent with every request to avoid detection.
|
| 12 |
+
/// These are the headers Chrome sends automatically on every navigation/fetch.
|
| 13 |
+
pub fn browser_default_headers() -> Vec<(&'static str, &'static str)> {
|
| 14 |
+
vec![
|
| 15 |
+
("User-Agent", DEFAULT_BROWSER_UA),
|
| 16 |
+
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"),
|
| 17 |
+
("Accept-Language", "en-US,en;q=0.9"),
|
| 18 |
+
("Accept-Encoding", "gzip, deflate, br"),
|
| 19 |
+
("Sec-CH-UA", "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\""),
|
| 20 |
+
("Sec-CH-UA-Mobile", "?0"),
|
| 21 |
+
("Sec-CH-UA-Platform", "\"Windows\""),
|
| 22 |
+
("Sec-Fetch-Dest", "document"),
|
| 23 |
+
("Sec-Fetch-Mode", "navigate"),
|
| 24 |
+
("Sec-Fetch-Site", "none"),
|
| 25 |
+
("Sec-Fetch-User", "?1"),
|
| 26 |
+
("Upgrade-Insecure-Requests", "1"),
|
| 27 |
+
("Connection", "keep-alive"),
|
| 28 |
+
("DNT", "1"),
|
| 29 |
+
]
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
/// Headers for XHR/API requests (Sec-Fetch differs from navigation)
|
| 33 |
+
pub fn browser_xhr_headers(referer: &str) -> Vec<(String, String)> {
|
| 34 |
+
vec![
|
| 35 |
+
("User-Agent".to_string(), DEFAULT_BROWSER_UA.to_string()),
|
| 36 |
+
("Accept".to_string(), "application/json, text/javascript, */*; q=0.01".to_string()),
|
| 37 |
+
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
| 38 |
+
("Accept-Encoding".to_string(), "gzip, deflate, br".to_string()),
|
| 39 |
+
("Sec-CH-UA".to_string(), "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\"".to_string()),
|
| 40 |
+
("Sec-CH-UA-Mobile".to_string(), "?0".to_string()),
|
| 41 |
+
("Sec-CH-UA-Platform".to_string(), "\"Windows\"".to_string()),
|
| 42 |
+
("Sec-Fetch-Dest".to_string(), "empty".to_string()),
|
| 43 |
+
("Sec-Fetch-Mode".to_string(), "cors".to_string()),
|
| 44 |
+
("Sec-Fetch-Site".to_string(), "same-origin".to_string()),
|
| 45 |
+
("X-Requested-With".to_string(), "XMLHttpRequest".to_string()),
|
| 46 |
+
("Referer".to_string(), referer.to_string()),
|
| 47 |
+
("Connection".to_string(), "keep-alive".to_string()),
|
| 48 |
+
]
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
/// Cache entry for HTTP responses.
|
| 52 |
#[derive(Clone)]
|
| 53 |
struct CacheEntry {
|
|
|
|
| 72 |
|
| 73 |
impl HttpHostService {
|
| 74 |
pub fn new(
|
| 75 |
+
_user_agent: &str, // Ignored — we always use real browser UA
|
| 76 |
timeout_ms: u32,
|
| 77 |
pool_idle_timeout_ms: u64,
|
| 78 |
pool_max_idle_per_host: usize,
|
| 79 |
) -> Self {
|
| 80 |
+
// Build client mimicking Chrome as closely as possible:
|
| 81 |
+
// - HTTP/2 enabled (Chrome always uses H2)
|
| 82 |
+
// - Proper header ordering via reqwest::header::HeaderMap
|
| 83 |
+
// - gzip/br/deflate decompression
|
| 84 |
+
// - Real browser User-Agent (not "BexEngine/6.0")
|
| 85 |
+
let mut headers = reqwest::header::HeaderMap::new();
|
| 86 |
+
for (k, v) in browser_default_headers() {
|
| 87 |
+
if let (Ok(name), Ok(val)) = (
|
| 88 |
+
reqwest::header::HeaderName::from_bytes(k.as_bytes()),
|
| 89 |
+
reqwest::header::HeaderValue::from_str(v),
|
| 90 |
+
) {
|
| 91 |
+
headers.insert(name, val);
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
let client = Client::builder()
|
| 96 |
.timeout(Duration::from_millis(timeout_ms as u64))
|
| 97 |
.redirect(reqwest::redirect::Policy::limited(10))
|
| 98 |
+
.default_headers(headers)
|
| 99 |
.pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms))
|
| 100 |
.pool_max_idle_per_host(pool_max_idle_per_host)
|
| 101 |
.use_rustls_tls()
|
| 102 |
+
.gzip(true)
|
| 103 |
+
.brotli(true)
|
| 104 |
+
.deflate(true)
|
| 105 |
+
// Enable HTTP/2 (Chrome always uses it)
|
| 106 |
+
.http2_prior_knowledge(false) // Allow HTTP/2 via ALPN negotiation
|
| 107 |
+
// Cookie store for session persistence (critical for CF challenges)
|
| 108 |
+
.cookie_store(true)
|
| 109 |
.build()
|
| 110 |
.expect("failed to build HTTP client");
|
| 111 |
|
|
|
|
| 131 |
None
|
| 132 |
}
|
| 133 |
|
| 134 |
+
/// Store response in cache
|
| 135 |
async fn store_cache(
|
| 136 |
&self,
|
| 137 |
url: &str,
|
|
|
|
| 140 |
headers: &HashMap<String, String>,
|
| 141 |
final_url: &str,
|
| 142 |
) {
|
|
|
|
| 143 |
if status != 200 && status != 301 && status != 302 {
|
| 144 |
return;
|
| 145 |
}
|
| 146 |
+
if body.len() > 2 * 1024 * 1024 {
|
|
|
|
| 147 |
return;
|
| 148 |
}
|
| 149 |
|
|
|
|
| 150 |
let max_age = if let Some(cc) = headers.get("cache-control") {
|
| 151 |
if cc.contains("no-store") || cc.contains("no-cache") || cc.contains("private") {
|
| 152 |
+
return;
|
| 153 |
}
|
| 154 |
if let Some(pos) = cc.find("max-age=") {
|
| 155 |
let rest = &cc[pos + 8..];
|
| 156 |
let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
|
| 157 |
+
rest[..end].parse::<u64>().ok()
|
| 158 |
+
.map(|secs| Duration::from_secs(secs.min(300)))
|
| 159 |
+
.unwrap_or(Duration::from_secs(60))
|
|
|
|
|
|
|
| 160 |
} else {
|
| 161 |
Duration::from_secs(60)
|
| 162 |
}
|
|
|
|
| 164 |
Duration::from_secs(60)
|
| 165 |
};
|
| 166 |
|
| 167 |
+
let mut cache = self.cache.write().await;
|
| 168 |
+
// Evict stale entries before adding
|
| 169 |
if cache.len() >= 500 {
|
| 170 |
+
cache.retain(|_, v| v.is_fresh());
|
| 171 |
+
}
|
| 172 |
+
if cache.len() >= 500 {
|
| 173 |
+
return; // Still full after eviction
|
| 174 |
}
|
|
|
|
| 175 |
|
|
|
|
| 176 |
cache.insert(
|
| 177 |
url.to_string(),
|
| 178 |
CacheEntry {
|
|
|
|
| 194 |
body: Option<Vec<u8>>,
|
| 195 |
timeout_ms: Option<u32>,
|
| 196 |
) -> anyhow::Result<(u16, Vec<u8>, HashMap<String, String>, String)> {
|
| 197 |
+
// Cache check for GET
|
| 198 |
if method == "GET" {
|
| 199 |
if let Some(cached) = self.check_cache(url).await {
|
| 200 |
return Ok(cached);
|
|
|
|
| 210 |
_ => self.client.get(url),
|
| 211 |
};
|
| 212 |
|
| 213 |
+
// CRITICAL: Apply plugin headers AFTER default headers.
|
| 214 |
+
// This lets plugins override defaults (e.g., different Referer).
|
| 215 |
+
// reqwest merges: plugin headers take precedence over defaults.
|
| 216 |
+
for (k, v) in &headers {
|
| 217 |
+
req = req.header(k.as_str(), v.as_str());
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
// If no User-Agent provided by plugin, the default_headers has one.
|
| 221 |
+
// If no Referer provided, add the origin of the URL being requested.
|
| 222 |
+
let has_referer = headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("referer"));
|
| 223 |
+
if !has_referer {
|
| 224 |
+
// Auto-generate Referer from URL origin (like a real browser)
|
| 225 |
+
if let Ok(parsed) = url::Url::parse(url) {
|
| 226 |
+
let origin = format!("{}://{}/", parsed.scheme(), parsed.host_str().unwrap_or(""));
|
| 227 |
+
req = req.header("Referer", &origin);
|
| 228 |
+
}
|
| 229 |
}
|
| 230 |
|
| 231 |
if let Some(b) = body {
|