krystv commited on
Commit
f812c95
·
verified ·
1 Parent(s): 3374e90

Upload crates/bex-core/src/http_service.rs

Browse files
Files changed (1) hide show
  1. crates/bex-core/src/http_service.rs +97 -21
crates/bex-core/src/http_service.rs CHANGED
@@ -4,6 +4,50 @@ use std::sync::Arc;
4
  use std::time::Duration;
5
  use tokio::sync::RwLock;
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  /// Cache entry for HTTP responses.
8
  #[derive(Clone)]
9
  struct CacheEntry {
@@ -28,18 +72,40 @@ pub struct HttpHostService {
28
 
29
  impl HttpHostService {
30
  pub fn new(
31
- user_agent: &str,
32
  timeout_ms: u32,
33
  pool_idle_timeout_ms: u64,
34
  pool_max_idle_per_host: usize,
35
  ) -> Self {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  let client = Client::builder()
37
  .timeout(Duration::from_millis(timeout_ms as u64))
38
  .redirect(reqwest::redirect::Policy::limited(10))
39
- .user_agent(user_agent)
40
  .pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms))
41
  .pool_max_idle_per_host(pool_max_idle_per_host)
42
  .use_rustls_tls()
 
 
 
 
 
 
 
43
  .build()
44
  .expect("failed to build HTTP client");
45
 
@@ -65,7 +131,7 @@ impl HttpHostService {
65
  None
66
  }
67
 
68
- /// Store response in cache if appropriate
69
  async fn store_cache(
70
  &self,
71
  url: &str,
@@ -74,28 +140,23 @@ impl HttpHostService {
74
  headers: &HashMap<String, String>,
75
  final_url: &str,
76
  ) {
77
- // Only cache successful GET responses with cacheable status
78
  if status != 200 && status != 301 && status != 302 {
79
  return;
80
  }
81
- // Don't cache huge responses
82
- if body.len() > 1024 * 1024 {
83
  return;
84
  }
85
 
86
- // Determine max-age from Cache-Control header
87
  let max_age = if let Some(cc) = headers.get("cache-control") {
88
  if cc.contains("no-store") || cc.contains("no-cache") || cc.contains("private") {
89
- return; // Don't cache
90
  }
91
  if let Some(pos) = cc.find("max-age=") {
92
  let rest = &cc[pos + 8..];
93
  let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
94
- if let Ok(secs) = rest[..end].parse::<u64>() {
95
- Duration::from_secs(secs.min(300)) // Cap at 5 minutes
96
- } else {
97
- Duration::from_secs(60)
98
- }
99
  } else {
100
  Duration::from_secs(60)
101
  }
@@ -103,14 +164,15 @@ impl HttpHostService {
103
  Duration::from_secs(60)
104
  };
105
 
106
- let cache = self.cache.read().await;
107
- // Limit cache size to prevent unbounded memory growth
108
  if cache.len() >= 500 {
109
- return;
 
 
 
110
  }
111
- drop(cache);
112
 
113
- let mut cache = self.cache.write().await;
114
  cache.insert(
115
  url.to_string(),
116
  CacheEntry {
@@ -132,7 +194,7 @@ impl HttpHostService {
132
  body: Option<Vec<u8>>,
133
  timeout_ms: Option<u32>,
134
  ) -> anyhow::Result<(u16, Vec<u8>, HashMap<String, String>, String)> {
135
- // Only cache GET requests
136
  if method == "GET" {
137
  if let Some(cached) = self.check_cache(url).await {
138
  return Ok(cached);
@@ -148,8 +210,22 @@ impl HttpHostService {
148
  _ => self.client.get(url),
149
  };
150
 
151
- for (k, v) in headers {
152
- req = req.header(&k, &v);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  }
154
 
155
  if let Some(b) = body {
 
4
  use std::time::Duration;
5
  use tokio::sync::RwLock;
6
 
7
+ /// Real Chrome 137 headers used as defaults when plugins don't provide their own.
8
+ /// This prevents bot detection by mimicking a real browser's header fingerprint.
9
+ pub const DEFAULT_BROWSER_UA: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36";
10
+
11
+ /// Standard browser headers that MUST be sent with every request to avoid detection.
12
+ /// These are the headers Chrome sends automatically on every navigation/fetch.
13
+ pub fn browser_default_headers() -> Vec<(&'static str, &'static str)> {
14
+ vec![
15
+ ("User-Agent", DEFAULT_BROWSER_UA),
16
+ ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"),
17
+ ("Accept-Language", "en-US,en;q=0.9"),
18
+ ("Accept-Encoding", "gzip, deflate, br"),
19
+ ("Sec-CH-UA", "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\""),
20
+ ("Sec-CH-UA-Mobile", "?0"),
21
+ ("Sec-CH-UA-Platform", "\"Windows\""),
22
+ ("Sec-Fetch-Dest", "document"),
23
+ ("Sec-Fetch-Mode", "navigate"),
24
+ ("Sec-Fetch-Site", "none"),
25
+ ("Sec-Fetch-User", "?1"),
26
+ ("Upgrade-Insecure-Requests", "1"),
27
+ ("Connection", "keep-alive"),
28
+ ("DNT", "1"),
29
+ ]
30
+ }
31
+
32
+ /// Headers for XHR/API requests (Sec-Fetch differs from navigation)
33
+ pub fn browser_xhr_headers(referer: &str) -> Vec<(String, String)> {
34
+ vec![
35
+ ("User-Agent".to_string(), DEFAULT_BROWSER_UA.to_string()),
36
+ ("Accept".to_string(), "application/json, text/javascript, */*; q=0.01".to_string()),
37
+ ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
38
+ ("Accept-Encoding".to_string(), "gzip, deflate, br".to_string()),
39
+ ("Sec-CH-UA".to_string(), "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\"".to_string()),
40
+ ("Sec-CH-UA-Mobile".to_string(), "?0".to_string()),
41
+ ("Sec-CH-UA-Platform".to_string(), "\"Windows\"".to_string()),
42
+ ("Sec-Fetch-Dest".to_string(), "empty".to_string()),
43
+ ("Sec-Fetch-Mode".to_string(), "cors".to_string()),
44
+ ("Sec-Fetch-Site".to_string(), "same-origin".to_string()),
45
+ ("X-Requested-With".to_string(), "XMLHttpRequest".to_string()),
46
+ ("Referer".to_string(), referer.to_string()),
47
+ ("Connection".to_string(), "keep-alive".to_string()),
48
+ ]
49
+ }
50
+
51
  /// Cache entry for HTTP responses.
52
  #[derive(Clone)]
53
  struct CacheEntry {
 
72
 
73
  impl HttpHostService {
74
  pub fn new(
75
+ _user_agent: &str, // Ignored — we always use real browser UA
76
  timeout_ms: u32,
77
  pool_idle_timeout_ms: u64,
78
  pool_max_idle_per_host: usize,
79
  ) -> Self {
80
+ // Build client mimicking Chrome as closely as possible:
81
+ // - HTTP/2 enabled (Chrome always uses H2)
82
+ // - Proper header ordering via reqwest::header::HeaderMap
83
+ // - gzip/br/deflate decompression
84
+ // - Real browser User-Agent (not "BexEngine/6.0")
85
+ let mut headers = reqwest::header::HeaderMap::new();
86
+ for (k, v) in browser_default_headers() {
87
+ if let (Ok(name), Ok(val)) = (
88
+ reqwest::header::HeaderName::from_bytes(k.as_bytes()),
89
+ reqwest::header::HeaderValue::from_str(v),
90
+ ) {
91
+ headers.insert(name, val);
92
+ }
93
+ }
94
+
95
  let client = Client::builder()
96
  .timeout(Duration::from_millis(timeout_ms as u64))
97
  .redirect(reqwest::redirect::Policy::limited(10))
98
+ .default_headers(headers)
99
  .pool_idle_timeout(Duration::from_millis(pool_idle_timeout_ms))
100
  .pool_max_idle_per_host(pool_max_idle_per_host)
101
  .use_rustls_tls()
102
+ .gzip(true)
103
+ .brotli(true)
104
+ .deflate(true)
105
+ // Enable HTTP/2 (Chrome always uses it)
106
+ .http2_prior_knowledge(false) // Allow HTTP/2 via ALPN negotiation
107
+ // Cookie store for session persistence (critical for CF challenges)
108
+ .cookie_store(true)
109
  .build()
110
  .expect("failed to build HTTP client");
111
 
 
131
  None
132
  }
133
 
134
+ /// Store response in cache
135
  async fn store_cache(
136
  &self,
137
  url: &str,
 
140
  headers: &HashMap<String, String>,
141
  final_url: &str,
142
  ) {
 
143
  if status != 200 && status != 301 && status != 302 {
144
  return;
145
  }
146
+ if body.len() > 2 * 1024 * 1024 {
 
147
  return;
148
  }
149
 
 
150
  let max_age = if let Some(cc) = headers.get("cache-control") {
151
  if cc.contains("no-store") || cc.contains("no-cache") || cc.contains("private") {
152
+ return;
153
  }
154
  if let Some(pos) = cc.find("max-age=") {
155
  let rest = &cc[pos + 8..];
156
  let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
157
+ rest[..end].parse::<u64>().ok()
158
+ .map(|secs| Duration::from_secs(secs.min(300)))
159
+ .unwrap_or(Duration::from_secs(60))
 
 
160
  } else {
161
  Duration::from_secs(60)
162
  }
 
164
  Duration::from_secs(60)
165
  };
166
 
167
+ let mut cache = self.cache.write().await;
168
+ // Evict stale entries before adding
169
  if cache.len() >= 500 {
170
+ cache.retain(|_, v| v.is_fresh());
171
+ }
172
+ if cache.len() >= 500 {
173
+ return; // Still full after eviction
174
  }
 
175
 
 
176
  cache.insert(
177
  url.to_string(),
178
  CacheEntry {
 
194
  body: Option<Vec<u8>>,
195
  timeout_ms: Option<u32>,
196
  ) -> anyhow::Result<(u16, Vec<u8>, HashMap<String, String>, String)> {
197
+ // Cache check for GET
198
  if method == "GET" {
199
  if let Some(cached) = self.check_cache(url).await {
200
  return Ok(cached);
 
210
  _ => self.client.get(url),
211
  };
212
 
213
+ // CRITICAL: Apply plugin headers AFTER default headers.
214
+ // This lets plugins override defaults (e.g., different Referer).
215
+ // reqwest merges: plugin headers take precedence over defaults.
216
+ for (k, v) in &headers {
217
+ req = req.header(k.as_str(), v.as_str());
218
+ }
219
+
220
+ // If no User-Agent provided by plugin, the default_headers has one.
221
+ // If no Referer provided, add the origin of the URL being requested.
222
+ let has_referer = headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("referer"));
223
+ if !has_referer {
224
+ // Auto-generate Referer from URL origin (like a real browser)
225
+ if let Ok(parsed) = url::Url::parse(url) {
226
+ let origin = format!("{}://{}/", parsed.scheme(), parsed.host_str().unwrap_or(""));
227
+ req = req.header("Referer", &origin);
228
+ }
229
  }
230
 
231
  if let Some(b) = body {