Spaces:
Sleeping
Sleeping
| import asyncio | |
| import time | |
| import re | |
| import os | |
| from typing import Optional, Dict | |
| from ..core.logger import debug_logger | |
| # Conditionally import playwright | |
| try: | |
| from playwright.async_api import async_playwright, BrowserContext, Page | |
| PLAYWRIGHT_AVAILABLE = True | |
| except ImportError: | |
| PLAYWRIGHT_AVAILABLE = False | |
| # ... (保持原来的 parse_proxy_url 和 validate_browser_proxy_url 函数不变) ... | |
| def parse_proxy_url(proxy_url: str) -> Optional[Dict[str, str]]: | |
| """解析代理URL,分离协议、主机、端口、认证信息""" | |
| proxy_pattern = r'^(socks5|http|https)://(?:([^:]+):([^@]+)@)?([^:]+):(\d+)$' | |
| match = re.match(proxy_pattern, proxy_url) | |
| if match: | |
| protocol, username, password, host, port = match.groups() | |
| proxy_config = {'server': f'{protocol}://{host}:{port}'} | |
| if username and password: | |
| proxy_config['username'] = username | |
| proxy_config['password'] = password | |
| return proxy_config | |
| return None | |
| class BrowserCaptchaService: | |
| """浏览器自动化获取 reCAPTCHA token(持久化有头模式)""" | |
| _instance: Optional['BrowserCaptchaService'] = None | |
| _lock = asyncio.Lock() | |
| def __init__(self, db=None): | |
| """初始化服务""" | |
| # === 修改点 1: 设置为有头模式 === | |
| self.headless = False | |
| self.playwright = None | |
| # 注意: 持久化模式下,我们操作的是 context 而不是 browser | |
| self.context: Optional[BrowserContext] = None | |
| self._initialized = False | |
| self.website_key = "6LdsFiUsAAAAAIjVDZcuLhaHiDn5nnHVXVRQGeMV" | |
| self.db = db | |
| # === 修改点 2: 指定本地数据存储目录 === | |
| # 这会在脚本运行目录下生成 browser_data 文件夹,用于保存你的登录状态 | |
| self.user_data_dir = os.path.join(os.getcwd(), "browser_data") | |
| async def get_instance(cls, db=None) -> 'BrowserCaptchaService': | |
| if cls._instance is None: | |
| async with cls._lock: | |
| if cls._instance is None: | |
| cls._instance = cls(db) | |
| # 首次调用不强制初始化,等待 get_token 时懒加载,或者可以在这里await | |
| return cls._instance | |
| async def initialize(self): | |
| """初始化持久化浏览器上下文""" | |
| if self._initialized and self.context: | |
| return | |
| try: | |
| # 检查 Playwright 是否可用 | |
| if not PLAYWRIGHT_AVAILABLE: | |
| debug_logger.log_error("[BrowserCaptcha] ❌ Playwright 不可用,请使用 YesCaptcha 服务") | |
| raise ImportError("Playwright 未安装,请使用 YesCaptcha 服务") | |
| proxy_url = None | |
| if self.db: | |
| captcha_config = await self.db.get_captcha_config() | |
| if captcha_config.browser_proxy_enabled and captcha_config.browser_proxy_url: | |
| proxy_url = captcha_config.browser_proxy_url | |
| debug_logger.log_info(f"[BrowserCaptcha] 正在启动浏览器 (用户数据目录: {self.user_data_dir})...") | |
| self.playwright = await async_playwright().start() | |
| # 配置启动参数 | |
| launch_options = { | |
| 'headless': self.headless, | |
| 'user_data_dir': self.user_data_dir, # 指定数据目录 | |
| 'viewport': {'width': 1280, 'height': 720}, # 设置默认窗口大小 | |
| 'args': [ | |
| '--disable-blink-features=AutomationControlled', | |
| '--disable-infobars', | |
| '--no-sandbox', | |
| '--disable-setuid-sandbox', | |
| ] | |
| } | |
| # 代理配置 | |
| if proxy_url: | |
| proxy_config = parse_proxy_url(proxy_url) | |
| if proxy_config: | |
| launch_options['proxy'] = proxy_config | |
| debug_logger.log_info(f"[BrowserCaptcha] 使用代理: {proxy_config['server']}") | |
| # === 修改点 3: 使用 launch_persistent_context === | |
| # 这会启动一个带有状态的浏览器窗口 | |
| self.context = await self.playwright.chromium.launch_persistent_context(**launch_options) | |
| # 设置默认超时 | |
| self.context.set_default_timeout(30000) | |
| self._initialized = True | |
| debug_logger.log_info(f"[BrowserCaptcha] ✅ 浏览器已启动 (Profile: {self.user_data_dir})") | |
| except Exception as e: | |
| debug_logger.log_error(f"[BrowserCaptcha] ❌ 浏览器启动失败: {str(e)}") | |
| raise | |
| async def get_token(self, project_id: str) -> Optional[str]: | |
| """获取 reCAPTCHA token""" | |
| # 确保浏览器已启动 | |
| if not self._initialized or not self.context: | |
| await self.initialize() | |
| start_time = time.time() | |
| page: Optional[Page] = None | |
| try: | |
| # === 修改点 4: 在现有上下文中新建标签页,而不是新建上下文 === | |
| # 这样可以复用该上下文中已保存的 Cookie (你的登录状态) | |
| page = await self.context.new_page() | |
| website_url = f"https://labs.google/fx/tools/flow/project/{project_id}" | |
| debug_logger.log_info(f"[BrowserCaptcha] 访问页面: {website_url}") | |
| # 访问页面 | |
| try: | |
| await page.goto(website_url, wait_until="domcontentloaded") | |
| except Exception as e: | |
| debug_logger.log_warning(f"[BrowserCaptcha] 页面加载警告: {str(e)}") | |
| # --- 关键点:如果需要人工介入 --- | |
| # 你可以在这里加入一段逻辑,如果是第一次运行,或者检测到未登录, | |
| # 可以暂停脚本,等你手动操作完再继续。 | |
| # 例如: await asyncio.sleep(30) | |
| # ... (中间注入脚本和执行 reCAPTCHA 的代码逻辑与原版完全一致,此处省略以节省篇幅) ... | |
| # ... 请将原代码中从 "检查并注入 reCAPTCHA v3 脚本" 到 token 获取部分的代码复制到这里 ... | |
| # 这里为了演示,简写注入逻辑(请保留你原有的完整注入逻辑): | |
| script_loaded = await page.evaluate("() => { return !!(window.grecaptcha && window.grecaptcha.execute); }") | |
| if not script_loaded: | |
| await page.evaluate(f""" | |
| () => {{ | |
| const script = document.createElement('script'); | |
| script.src = 'https://www.google.com/recaptcha/api.js?render={self.website_key}'; | |
| script.async = true; script.defer = true; | |
| document.head.appendChild(script); | |
| }} | |
| """) | |
| # 等待加载... (保留你原有的等待循环) | |
| await page.wait_for_timeout(2000) | |
| # 执行获取 Token (保留你原有的 execute 逻辑) | |
| token = await page.evaluate(f""" | |
| async () => {{ | |
| try {{ | |
| return await window.grecaptcha.execute('{self.website_key}', {{ action: 'FLOW_GENERATION' }}); | |
| }} catch (e) {{ return null; }} | |
| }} | |
| """) | |
| if token: | |
| debug_logger.log_info(f"[BrowserCaptcha] ✅ Token获取成功") | |
| return token | |
| else: | |
| debug_logger.log_error("[BrowserCaptcha] Token获取失败") | |
| return None | |
| except Exception as e: | |
| debug_logger.log_error(f"[BrowserCaptcha] 异常: {str(e)}") | |
| return None | |
| finally: | |
| # === 修改点 5: 只关闭 Page (标签页),不关闭 Context (浏览器窗口) === | |
| if page: | |
| try: | |
| await page.close() | |
| except: | |
| pass | |
| async def close(self): | |
| """完全关闭浏览器(清理资源时调用)""" | |
| try: | |
| if self.context: | |
| await self.context.close() # 这会关闭整个浏览器窗口 | |
| self.context = None | |
| if self.playwright: | |
| await self.playwright.stop() | |
| self.playwright = None | |
| self._initialized = False | |
| debug_logger.log_info("[BrowserCaptcha] 浏览器服务已关闭") | |
| except Exception as e: | |
| debug_logger.log_error(f"[BrowserCaptcha] 关闭异常: {str(e)}") | |
| # 增加一个辅助方法,用于手动登录 | |
| async def open_login_window(self): | |
| """调用此方法打开一个永久窗口供你登录Google""" | |
| await self.initialize() | |
| page = await self.context.new_page() | |
| await page.goto("https://accounts.google.com/") | |
| print("请在打开的浏览器中登录账号。登录完成后,无需关闭浏览器,脚本下次运行时会自动使用此状态。") |