krystv commited on
Commit
ceb06e0
·
verified ·
1 Parent(s): e24324c

fix: restore portable reqwest HTTP service with browser-like defaults

Browse files
Files changed (1) hide show
  1. crates/bex-core/src/http_service.rs +75 -102
crates/bex-core/src/http_service.rs CHANGED
@@ -1,30 +1,52 @@
1
- //! HTTP Service — Full Chrome 137 Browser Impersonation
2
  //!
3
- //! This module uses `rquest` (BoringSSL-backed) to impersonate Chrome's exact
4
- //! TLS fingerprint (JA3/JA4), HTTP/2 SETTINGS frame, and header ordering.
5
- //!
6
- //! WHY THIS MATTERS:
7
- //! Cloudflare uses 3 fingerprinting layers simultaneously:
8
- //! 1. JA3/JA4 — TLS ClientHello cipher suites, extensions, curves
9
- //! 2. Akamai H2 FP — HTTP/2 SETTINGS values (INITIAL_WINDOW_SIZE, etc.)
10
- //! 3. Header ordering — Chrome sends headers in a specific order
11
  //!
12
- //! `rustls` fails ALL THREE checks. `rquest` with BoringSSL passes all of them
13
- //! because it uses Chrome's actual TLS library (BoringSSL) with the exact same
14
- //! configuration Chrome uses.
 
 
15
  //!
16
- //! The result: Cloudflare sees an identical fingerprint to a real Chrome browser.
17
- //! No JS challenges, no blocks, no CAPTCHAs (except Turnstile which requires
18
- //! actual human interaction).
 
19
 
20
- use rquest::Client;
21
- use rquest::tls::Impersonate;
22
  use std::collections::HashMap;
23
  use std::sync::Arc;
24
  use std::time::Duration;
25
  use tokio::sync::RwLock;
26
 
27
- /// Cache entry for HTTP responses.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  #[derive(Clone)]
29
  struct CacheEntry {
30
  body: Vec<u8>,
@@ -47,49 +69,36 @@ pub struct HttpHostService {
47
  }
48
 
49
  impl HttpHostService {
50
- /// Create a new HTTP service with full Chrome 137 impersonation.
51
- ///
52
- /// This sets up:
53
- /// - TLS ClientHello identical to Chrome 137 (JA3/JA4 match)
54
- /// - HTTP/2 SETTINGS frame identical to Chrome (Akamai FP match)
55
- /// - Header ordering identical to Chrome
56
- /// - GREASE values in TLS extensions (RFC 8701)
57
- /// - Certificate compression (brotli) support
58
- /// - Cookie store for CF session persistence
59
- /// - gzip/br/deflate decompression
60
- ///
61
- /// No manual header configuration needed — rquest handles everything.
62
  pub fn new(
63
- _user_agent: &str, // Ignored — Chrome 137 UA is set by impersonation
64
  timeout_ms: u32,
65
  pool_idle_timeout_ms: u64,
66
  pool_max_idle_per_host: usize,
67
  ) -> Self {
68
- // Build client with Chrome 137 impersonation.
69
- // This single call configures:
70
- // - BoringSSL with Chrome's exact cipher suite order
71
- // - GREASE randomization in TLS extensions
72
- // - Chrome's HTTP/2 SETTINGS frame (INITIAL_WINDOW_SIZE=6291456, etc.)
73
- // - Chrome's header ordering (sec-ch-ua before accept, etc.)
74
- // - Chrome's User-Agent string
75
- // - Certificate compression (brotli)
76
- // - ALPS/ALPN negotiation
 
77
  let client = Client::builder()
78
- .impersonate(Impersonate::Chrome137)
79
  .timeout(Duration::from_millis(timeout_ms as u64))
80
- .redirect(rquest::redirect::Policy::limited(10))
 
81
  .pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms))
82
  .pool_max_idle_per_host(pool_max_idle_per_host)
83
- // Cookie store is CRITICAL for Cloudflare:
84
- // CF sets cookies after passing a challenge; subsequent requests
85
- // must include these cookies or get re-challenged.
86
- .cookie_store(true)
87
- // Content-encoding decompression (browsers always support these)
88
  .gzip(true)
89
  .brotli(true)
90
  .deflate(true)
 
 
91
  .build()
92
- .expect("failed to build HTTP client with Chrome impersonation");
93
 
94
  Self {
95
  client,
@@ -97,23 +106,18 @@ impl HttpHostService {
97
  }
98
  }
99
 
100
- /// Check cache for a fresh entry
101
  async fn check_cache(&self, url: &str) -> Option<(u16, Vec<u8>, HashMap<String, String>, String)> {
102
  let cache = self.cache.read().await;
103
- if let Some(entry) = cache.get(url) {
104
- if entry.is_fresh() {
105
- return Some((
106
- entry.status,
107
- entry.body.clone(),
108
- entry.headers.clone(),
109
- entry.final_url.clone(),
110
- ));
111
- }
112
- }
113
- None
114
  }
115
 
116
- /// Store response in cache (only 2xx, < 2MB, respecting Cache-Control)
117
  async fn store_cache(
118
  &self,
119
  url: &str,
@@ -122,19 +126,17 @@ impl HttpHostService {
122
  headers: &HashMap<String, String>,
123
  final_url: &str,
124
  ) {
125
- if status < 200 || status >= 300 {
126
- return;
127
- }
128
- if body.len() > 2 * 1024 * 1024 {
129
  return;
130
  }
131
 
132
  let max_age = if let Some(cc) = headers.get("cache-control") {
133
- if cc.contains("no-store") || cc.contains("no-cache") || cc.contains("private") {
 
134
  return;
135
  }
136
- if let Some(pos) = cc.find("max-age=") {
137
- let rest = &cc[pos + 8..];
138
  let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
139
  rest[..end]
140
  .parse::<u64>()
@@ -169,21 +171,6 @@ impl HttpHostService {
169
  );
170
  }
171
 
172
- /// Send an HTTP request with full Chrome impersonation.
173
- ///
174
- /// IMPORTANT: Plugin-provided headers are applied AFTER the Chrome defaults.
175
- /// This means plugins CAN override specific headers (e.g., custom Referer)
176
- /// without breaking the fingerprint. The TLS fingerprint and H2 SETTINGS
177
- /// are set at the connection level and cannot be overridden by headers.
178
- ///
179
- /// The only headers plugins should typically set are:
180
- /// - Referer (for same-origin API calls)
181
- /// - X-Requested-With (for XHR detection)
182
- /// - Authorization / API keys
183
- /// - Content-Type (for POST)
184
- ///
185
- /// Do NOT set User-Agent, Sec-CH-UA, Sec-Fetch-* — these are handled
186
- /// by the impersonation layer and setting them manually may break ordering.
187
  pub async fn send_request(
188
  &self,
189
  method: &str,
@@ -192,7 +179,6 @@ impl HttpHostService {
192
  body: Option<Vec<u8>>,
193
  timeout_ms: Option<u32>,
194
  ) -> anyhow::Result<(u16, Vec<u8>, HashMap<String, String>, String)> {
195
- // Cache check for GET
196
  if method == "GET" {
197
  if let Some(cached) = self.check_cache(url).await {
198
  return Ok(cached);
@@ -208,34 +194,23 @@ impl HttpHostService {
208
  _ => self.client.get(url),
209
  };
210
 
211
- // Apply plugin headers.
212
- // IMPORTANT: Only set headers that the plugin explicitly provides.
213
- // The Chrome impersonation already handles UA, Sec-CH-UA, Sec-Fetch-*, etc.
214
- // Plugin headers that conflict with impersonation defaults will override them.
215
  for (k, v) in &headers {
216
  req = req.header(k.as_str(), v.as_str());
217
  }
218
 
219
- // Auto-generate Referer from URL origin if not provided by plugin.
220
- // Chrome automatically sends Referer on same-origin requests.
221
- let has_referer = headers
222
- .iter()
223
- .any(|(k, _)| k.eq_ignore_ascii_case("referer"));
224
  if !has_referer {
225
  if let Ok(parsed) = url::Url::parse(url) {
226
- let origin = format!(
227
- "{}://{}/",
228
- parsed.scheme(),
229
- parsed.host_str().unwrap_or("")
230
- );
231
- req = req.header("Referer", &origin);
232
  }
233
  }
234
 
235
  if let Some(b) = body {
236
  req = req.body(b);
237
  }
238
-
239
  if let Some(ms) = timeout_ms {
240
  req = req.timeout(Duration::from_millis(ms as u64));
241
  }
@@ -250,7 +225,6 @@ impl HttpHostService {
250
  .collect();
251
  let resp_body = resp.bytes().await?.to_vec();
252
 
253
- // Cache GET responses
254
  if method == "GET" {
255
  self.store_cache(url, status, resp_body.clone(), &resp_headers, &final_url)
256
  .await;
@@ -259,7 +233,6 @@ impl HttpHostService {
259
  Ok((status, resp_body, resp_headers, final_url))
260
  }
261
 
262
- /// Clear the HTTP cache
263
  pub async fn clear_cache(&self) {
264
  self.cache.write().await.clear();
265
  }
 
1
+ //! HTTP Service — portable production backend.
2
  //!
3
+ //! This backend uses `reqwest` + `rustls` for reliable cross-platform builds and
4
+ //! native-library embedding in C++ apps. It sends browser-like HTTP headers,
5
+ //! supports HTTP/2, cookies, gzip/brotli/deflate, caching, max response limits,
6
+ //! and plugin-provided header overrides.
 
 
 
 
7
  //!
8
+ //! IMPORTANT LIMITATION:
9
+ //! This default backend does NOT byte-for-byte impersonate Chrome's TLS JA3/JA4
10
+ //! fingerprint. It is production-friendly and portable, but advanced Cloudflare,
11
+ //! DataDome, PerimeterX, or Akamai Bot Manager deployments can still detect that
12
+ //! the TLS stack is rustls, not Chrome/BoringSSL.
13
  //!
14
+ //! For strict anti-bot bypass, add a second optional backend based on a verified
15
+ //! BoringSSL/curl-impersonate client and gate it behind a Cargo feature. Do not
16
+ //! make that the default until it is compiled and tested on every target platform
17
+ //! you intend to ship.
18
 
19
+ use reqwest::Client;
 
20
  use std::collections::HashMap;
21
  use std::sync::Arc;
22
  use std::time::Duration;
23
  use tokio::sync::RwLock;
24
 
25
+ /// Current Chrome-like desktop UA. Kept centralized so plugins and host match.
26
+ pub const DEFAULT_BROWSER_UA: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36";
27
+
28
+ /// Default browser-ish navigation/fetch headers.
29
+ ///
30
+ /// These help with simple header-based checks. They do not alter TLS JA3/JA4.
31
+ pub fn browser_default_headers() -> Vec<(&'static str, &'static str)> {
32
+ vec![
33
+ ("User-Agent", DEFAULT_BROWSER_UA),
34
+ ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"),
35
+ ("Accept-Language", "en-US,en;q=0.9"),
36
+ ("Accept-Encoding", "gzip, deflate, br"),
37
+ ("Sec-CH-UA", "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\""),
38
+ ("Sec-CH-UA-Mobile", "?0"),
39
+ ("Sec-CH-UA-Platform", "\"Windows\""),
40
+ ("Sec-Fetch-Dest", "document"),
41
+ ("Sec-Fetch-Mode", "navigate"),
42
+ ("Sec-Fetch-Site", "none"),
43
+ ("Sec-Fetch-User", "?1"),
44
+ ("Upgrade-Insecure-Requests", "1"),
45
+ ("Connection", "keep-alive"),
46
+ ("DNT", "1"),
47
+ ]
48
+ }
49
+
50
  #[derive(Clone)]
51
  struct CacheEntry {
52
  body: Vec<u8>,
 
69
  }
70
 
71
  impl HttpHostService {
 
 
 
 
 
 
 
 
 
 
 
 
72
  pub fn new(
73
+ _user_agent: &str,
74
  timeout_ms: u32,
75
  pool_idle_timeout_ms: u64,
76
  pool_max_idle_per_host: usize,
77
  ) -> Self {
78
+ let mut headers = reqwest::header::HeaderMap::new();
79
+ for (k, v) in browser_default_headers() {
80
+ if let (Ok(name), Ok(val)) = (
81
+ reqwest::header::HeaderName::from_bytes(k.as_bytes()),
82
+ reqwest::header::HeaderValue::from_str(v),
83
+ ) {
84
+ headers.insert(name, val);
85
+ }
86
+ }
87
+
88
  let client = Client::builder()
 
89
  .timeout(Duration::from_millis(timeout_ms as u64))
90
+ .redirect(reqwest::redirect::Policy::limited(10))
91
+ .default_headers(headers)
92
  .pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms))
93
  .pool_max_idle_per_host(pool_max_idle_per_host)
94
+ .use_rustls_tls()
 
 
 
 
95
  .gzip(true)
96
  .brotli(true)
97
  .deflate(true)
98
+ .http2_prior_knowledge(false)
99
+ .cookie_store(true)
100
  .build()
101
+ .expect("failed to build HTTP client");
102
 
103
  Self {
104
  client,
 
106
  }
107
  }
108
 
 
109
  async fn check_cache(&self, url: &str) -> Option<(u16, Vec<u8>, HashMap<String, String>, String)> {
110
  let cache = self.cache.read().await;
111
+ cache.get(url).filter(|entry| entry.is_fresh()).map(|entry| {
112
+ (
113
+ entry.status,
114
+ entry.body.clone(),
115
+ entry.headers.clone(),
116
+ entry.final_url.clone(),
117
+ )
118
+ })
 
 
 
119
  }
120
 
 
121
  async fn store_cache(
122
  &self,
123
  url: &str,
 
126
  headers: &HashMap<String, String>,
127
  final_url: &str,
128
  ) {
129
+ if !(200..300).contains(&status) || body.len() > 2 * 1024 * 1024 {
 
 
 
130
  return;
131
  }
132
 
133
  let max_age = if let Some(cc) = headers.get("cache-control") {
134
+ let cc_l = cc.to_ascii_lowercase();
135
+ if cc_l.contains("no-store") || cc_l.contains("no-cache") || cc_l.contains("private") {
136
  return;
137
  }
138
+ if let Some(pos) = cc_l.find("max-age=") {
139
+ let rest = &cc_l[pos + 8..];
140
  let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
141
  rest[..end]
142
  .parse::<u64>()
 
171
  );
172
  }
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  pub async fn send_request(
175
  &self,
176
  method: &str,
 
179
  body: Option<Vec<u8>>,
180
  timeout_ms: Option<u32>,
181
  ) -> anyhow::Result<(u16, Vec<u8>, HashMap<String, String>, String)> {
 
182
  if method == "GET" {
183
  if let Some(cached) = self.check_cache(url).await {
184
  return Ok(cached);
 
194
  _ => self.client.get(url),
195
  };
196
 
 
 
 
 
197
  for (k, v) in &headers {
198
  req = req.header(k.as_str(), v.as_str());
199
  }
200
 
201
+ let has_referer = headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("referer"));
 
 
 
 
202
  if !has_referer {
203
  if let Ok(parsed) = url::Url::parse(url) {
204
+ if let Some(host) = parsed.host_str() {
205
+ let origin = format!("{}://{}/", parsed.scheme(), host);
206
+ req = req.header("Referer", &origin);
207
+ }
 
 
208
  }
209
  }
210
 
211
  if let Some(b) = body {
212
  req = req.body(b);
213
  }
 
214
  if let Some(ms) = timeout_ms {
215
  req = req.timeout(Duration::from_millis(ms as u64));
216
  }
 
225
  .collect();
226
  let resp_body = resp.bytes().await?.to_vec();
227
 
 
228
  if method == "GET" {
229
  self.store_cache(url, status, resp_body.clone(), &resp_headers, &final_url)
230
  .await;
 
233
  Ok((status, resp_body, resp_headers, final_url))
234
  }
235
 
 
236
  pub async fn clear_cache(&self) {
237
  self.cache.write().await.clear();
238
  }