import threading import time TOKENS_PER_MINUTE = 5500 _tokens_per_second = TOKENS_PER_MINUTE / 60.0 _capacity = TOKENS_PER_MINUTE _lock = threading.Lock() _tokens = _capacity _last_timestamp = time.monotonic() def consume(n: int) -> None: """Consume *n* tokens, waiting if necessary. This function implements a simple thread-safe token bucket to keep requests under the configured tokens-per-minute rate. """ global _tokens, _last_timestamp if n <= 0: return while True: with _lock: now = time.monotonic() elapsed = now - _last_timestamp _tokens = min(_capacity, _tokens + elapsed * _tokens_per_second) _last_timestamp = now if n <= _tokens: _tokens -= n return needed = n - _tokens wait_time = needed / _tokens_per_second _tokens = 0 time.sleep(wait_time)