krystv commited on
Commit
000f664
·
verified ·
1 Parent(s): bc37913

feat: rewrite HTTP service with full Chrome 137 TLS/H2 impersonation via rquest

Browse files
Files changed (1) hide show
  1. crates/bex-core/src/http_service.rs +86 -82
crates/bex-core/src/http_service.rs CHANGED
@@ -1,53 +1,29 @@
1
- use reqwest::Client;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  use std::collections::HashMap;
3
  use std::sync::Arc;
4
  use std::time::Duration;
5
  use tokio::sync::RwLock;
6
 
7
- /// Real Chrome 137 headers used as defaults when plugins don't provide their own.
8
- /// This prevents bot detection by mimicking a real browser's header fingerprint.
9
- pub const DEFAULT_BROWSER_UA: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36";
10
-
11
- /// Standard browser headers that MUST be sent with every request to avoid detection.
12
- /// These are the headers Chrome sends automatically on every navigation/fetch.
13
- pub fn browser_default_headers() -> Vec<(&'static str, &'static str)> {
14
- vec![
15
- ("User-Agent", DEFAULT_BROWSER_UA),
16
- ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"),
17
- ("Accept-Language", "en-US,en;q=0.9"),
18
- ("Accept-Encoding", "gzip, deflate, br"),
19
- ("Sec-CH-UA", "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\""),
20
- ("Sec-CH-UA-Mobile", "?0"),
21
- ("Sec-CH-UA-Platform", "\"Windows\""),
22
- ("Sec-Fetch-Dest", "document"),
23
- ("Sec-Fetch-Mode", "navigate"),
24
- ("Sec-Fetch-Site", "none"),
25
- ("Sec-Fetch-User", "?1"),
26
- ("Upgrade-Insecure-Requests", "1"),
27
- ("Connection", "keep-alive"),
28
- ("DNT", "1"),
29
- ]
30
- }
31
-
32
- /// Headers for XHR/API requests (Sec-Fetch differs from navigation)
33
- pub fn browser_xhr_headers(referer: &str) -> Vec<(String, String)> {
34
- vec![
35
- ("User-Agent".to_string(), DEFAULT_BROWSER_UA.to_string()),
36
- ("Accept".to_string(), "application/json, text/javascript, */*; q=0.01".to_string()),
37
- ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
38
- ("Accept-Encoding".to_string(), "gzip, deflate, br".to_string()),
39
- ("Sec-CH-UA".to_string(), "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\"".to_string()),
40
- ("Sec-CH-UA-Mobile".to_string(), "?0".to_string()),
41
- ("Sec-CH-UA-Platform".to_string(), "\"Windows\"".to_string()),
42
- ("Sec-Fetch-Dest".to_string(), "empty".to_string()),
43
- ("Sec-Fetch-Mode".to_string(), "cors".to_string()),
44
- ("Sec-Fetch-Site".to_string(), "same-origin".to_string()),
45
- ("X-Requested-With".to_string(), "XMLHttpRequest".to_string()),
46
- ("Referer".to_string(), referer.to_string()),
47
- ("Connection".to_string(), "keep-alive".to_string()),
48
- ]
49
- }
50
-
51
  /// Cache entry for HTTP responses.
52
  #[derive(Clone)]
53
  struct CacheEntry {
@@ -71,43 +47,49 @@ pub struct HttpHostService {
71
  }
72
 
73
  impl HttpHostService {
 
 
 
 
 
 
 
 
 
 
 
 
74
  pub fn new(
75
- _user_agent: &str, // Ignored — we always use real browser UA
76
  timeout_ms: u32,
77
  pool_idle_timeout_ms: u64,
78
  pool_max_idle_per_host: usize,
79
  ) -> Self {
80
- // Build client mimicking Chrome as closely as possible:
81
- // - HTTP/2 enabled (Chrome always uses H2)
82
- // - Proper header ordering via reqwest::header::HeaderMap
83
- // - gzip/br/deflate decompression
84
- // - Real browser User-Agent (not "BexEngine/6.0")
85
- let mut headers = reqwest::header::HeaderMap::new();
86
- for (k, v) in browser_default_headers() {
87
- if let (Ok(name), Ok(val)) = (
88
- reqwest::header::HeaderName::from_bytes(k.as_bytes()),
89
- reqwest::header::HeaderValue::from_str(v),
90
- ) {
91
- headers.insert(name, val);
92
- }
93
- }
94
-
95
  let client = Client::builder()
 
96
  .timeout(Duration::from_millis(timeout_ms as u64))
97
- .redirect(reqwest::redirect::Policy::limited(10))
98
- .default_headers(headers)
99
  .pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms))
100
  .pool_max_idle_per_host(pool_max_idle_per_host)
101
- .use_rustls_tls()
 
 
 
 
102
  .gzip(true)
103
  .brotli(true)
104
  .deflate(true)
105
- // Enable HTTP/2 (Chrome always uses it)
106
- .http2_prior_knowledge(false) // Allow HTTP/2 via ALPN negotiation
107
- // Cookie store for session persistence (critical for CF challenges)
108
- .cookie_store(true)
109
  .build()
110
- .expect("failed to build HTTP client");
111
 
112
  Self {
113
  client,
@@ -131,7 +113,7 @@ impl HttpHostService {
131
  None
132
  }
133
 
134
- /// Store response in cache
135
  async fn store_cache(
136
  &self,
137
  url: &str,
@@ -140,7 +122,7 @@ impl HttpHostService {
140
  headers: &HashMap<String, String>,
141
  final_url: &str,
142
  ) {
143
- if status != 200 && status != 301 && status != 302 {
144
  return;
145
  }
146
  if body.len() > 2 * 1024 * 1024 {
@@ -154,7 +136,9 @@ impl HttpHostService {
154
  if let Some(pos) = cc.find("max-age=") {
155
  let rest = &cc[pos + 8..];
156
  let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
157
- rest[..end].parse::<u64>().ok()
 
 
158
  .map(|secs| Duration::from_secs(secs.min(300)))
159
  .unwrap_or(Duration::from_secs(60))
160
  } else {
@@ -165,12 +149,11 @@ impl HttpHostService {
165
  };
166
 
167
  let mut cache = self.cache.write().await;
168
- // Evict stale entries before adding
169
  if cache.len() >= 500 {
170
  cache.retain(|_, v| v.is_fresh());
171
  }
172
  if cache.len() >= 500 {
173
- return; // Still full after eviction
174
  }
175
 
176
  cache.insert(
@@ -186,6 +169,21 @@ impl HttpHostService {
186
  );
187
  }
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  pub async fn send_request(
190
  &self,
191
  method: &str,
@@ -210,20 +208,26 @@ impl HttpHostService {
210
  _ => self.client.get(url),
211
  };
212
 
213
- // CRITICAL: Apply plugin headers AFTER default headers.
214
- // This lets plugins override defaults (e.g., different Referer).
215
- // reqwest merges: plugin headers take precedence over defaults.
 
216
  for (k, v) in &headers {
217
  req = req.header(k.as_str(), v.as_str());
218
  }
219
 
220
- // If no User-Agent provided by plugin, the default_headers has one.
221
- // If no Referer provided, add the origin of the URL being requested.
222
- let has_referer = headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("referer"));
 
 
223
  if !has_referer {
224
- // Auto-generate Referer from URL origin (like a real browser)
225
  if let Ok(parsed) = url::Url::parse(url) {
226
- let origin = format!("{}://{}/", parsed.scheme(), parsed.host_str().unwrap_or(""));
 
 
 
 
227
  req = req.header("Referer", &origin);
228
  }
229
  }
 
1
+ //! HTTP Service — Full Chrome 137 Browser Impersonation
2
+ //!
3
+ //! This module uses `rquest` (BoringSSL-backed) to impersonate Chrome's exact
4
+ //! TLS fingerprint (JA3/JA4), HTTP/2 SETTINGS frame, and header ordering.
5
+ //!
6
+ //! WHY THIS MATTERS:
7
+ //! Cloudflare uses 3 fingerprinting layers simultaneously:
8
+ //! 1. JA3/JA4 — TLS ClientHello cipher suites, extensions, curves
9
+ //! 2. Akamai H2 FP — HTTP/2 SETTINGS values (INITIAL_WINDOW_SIZE, etc.)
10
+ //! 3. Header ordering — Chrome sends headers in a specific order
11
+ //!
12
+ //! `rustls` fails ALL THREE checks. `rquest` with BoringSSL passes all of them
13
+ //! because it uses Chrome's actual TLS library (BoringSSL) with the exact same
14
+ //! configuration Chrome uses.
15
+ //!
16
+ //! The result: Cloudflare sees an identical fingerprint to a real Chrome browser.
17
+ //! No JS challenges, no blocks, no CAPTCHAs (except Turnstile which requires
18
+ //! actual human interaction).
19
+
20
+ use rquest::Client;
21
+ use rquest::tls::Impersonate;
22
  use std::collections::HashMap;
23
  use std::sync::Arc;
24
  use std::time::Duration;
25
  use tokio::sync::RwLock;
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  /// Cache entry for HTTP responses.
28
  #[derive(Clone)]
29
  struct CacheEntry {
 
47
  }
48
 
49
  impl HttpHostService {
50
+ /// Create a new HTTP service with full Chrome 137 impersonation.
51
+ ///
52
+ /// This sets up:
53
+ /// - TLS ClientHello identical to Chrome 137 (JA3/JA4 match)
54
+ /// - HTTP/2 SETTINGS frame identical to Chrome (Akamai FP match)
55
+ /// - Header ordering identical to Chrome
56
+ /// - GREASE values in TLS extensions (RFC 8701)
57
+ /// - Certificate compression (brotli) support
58
+ /// - Cookie store for CF session persistence
59
+ /// - gzip/br/deflate decompression
60
+ ///
61
+ /// No manual header configuration needed — rquest handles everything.
62
  pub fn new(
63
+ _user_agent: &str, // Ignored — Chrome 137 UA is set by impersonation
64
  timeout_ms: u32,
65
  pool_idle_timeout_ms: u64,
66
  pool_max_idle_per_host: usize,
67
  ) -> Self {
68
+ // Build client with Chrome 137 impersonation.
69
+ // This single call configures:
70
+ // - BoringSSL with Chrome's exact cipher suite order
71
+ // - GREASE randomization in TLS extensions
72
+ // - Chrome's HTTP/2 SETTINGS frame (INITIAL_WINDOW_SIZE=6291456, etc.)
73
+ // - Chrome's header ordering (sec-ch-ua before accept, etc.)
74
+ // - Chrome's User-Agent string
75
+ // - Certificate compression (brotli)
76
+ // - ALPS/ALPN negotiation
 
 
 
 
 
 
77
  let client = Client::builder()
78
+ .impersonate(Impersonate::Chrome137)
79
  .timeout(Duration::from_millis(timeout_ms as u64))
80
+ .redirect(rquest::redirect::Policy::limited(10))
 
81
  .pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms))
82
  .pool_max_idle_per_host(pool_max_idle_per_host)
83
+ // Cookie store is CRITICAL for Cloudflare:
84
+ // CF sets cookies after passing a challenge; subsequent requests
85
+ // must include these cookies or get re-challenged.
86
+ .cookie_store(true)
87
+ // Content-encoding decompression (browsers always support these)
88
  .gzip(true)
89
  .brotli(true)
90
  .deflate(true)
 
 
 
 
91
  .build()
92
+ .expect("failed to build HTTP client with Chrome impersonation");
93
 
94
  Self {
95
  client,
 
113
  None
114
  }
115
 
116
+ /// Store response in cache (only 2xx, < 2MB, respecting Cache-Control)
117
  async fn store_cache(
118
  &self,
119
  url: &str,
 
122
  headers: &HashMap<String, String>,
123
  final_url: &str,
124
  ) {
125
+ if status < 200 || status >= 300 {
126
  return;
127
  }
128
  if body.len() > 2 * 1024 * 1024 {
 
136
  if let Some(pos) = cc.find("max-age=") {
137
  let rest = &cc[pos + 8..];
138
  let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
139
+ rest[..end]
140
+ .parse::<u64>()
141
+ .ok()
142
  .map(|secs| Duration::from_secs(secs.min(300)))
143
  .unwrap_or(Duration::from_secs(60))
144
  } else {
 
149
  };
150
 
151
  let mut cache = self.cache.write().await;
 
152
  if cache.len() >= 500 {
153
  cache.retain(|_, v| v.is_fresh());
154
  }
155
  if cache.len() >= 500 {
156
+ return;
157
  }
158
 
159
  cache.insert(
 
169
  );
170
  }
171
 
172
+ /// Send an HTTP request with full Chrome impersonation.
173
+ ///
174
+ /// IMPORTANT: Plugin-provided headers are applied AFTER the Chrome defaults.
175
+ /// This means plugins CAN override specific headers (e.g., custom Referer)
176
+ /// without breaking the fingerprint. The TLS fingerprint and H2 SETTINGS
177
+ /// are set at the connection level and cannot be overridden by headers.
178
+ ///
179
+ /// The only headers plugins should typically set are:
180
+ /// - Referer (for same-origin API calls)
181
+ /// - X-Requested-With (for XHR detection)
182
+ /// - Authorization / API keys
183
+ /// - Content-Type (for POST)
184
+ ///
185
+ /// Do NOT set User-Agent, Sec-CH-UA, Sec-Fetch-* — these are handled
186
+ /// by the impersonation layer and setting them manually may break ordering.
187
  pub async fn send_request(
188
  &self,
189
  method: &str,
 
208
  _ => self.client.get(url),
209
  };
210
 
211
+ // Apply plugin headers.
212
+ // IMPORTANT: Only set headers that the plugin explicitly provides.
213
+ // The Chrome impersonation already handles UA, Sec-CH-UA, Sec-Fetch-*, etc.
214
+ // Plugin headers that conflict with impersonation defaults will override them.
215
  for (k, v) in &headers {
216
  req = req.header(k.as_str(), v.as_str());
217
  }
218
 
219
+ // Auto-generate Referer from URL origin if not provided by plugin.
220
+ // Chrome automatically sends Referer on same-origin requests.
221
+ let has_referer = headers
222
+ .iter()
223
+ .any(|(k, _)| k.eq_ignore_ascii_case("referer"));
224
  if !has_referer {
 
225
  if let Ok(parsed) = url::Url::parse(url) {
226
+ let origin = format!(
227
+ "{}://{}/",
228
+ parsed.scheme(),
229
+ parsed.host_str().unwrap_or("")
230
+ );
231
  req = req.header("Referer", &origin);
232
  }
233
  }