| |
| """ |
| Playwright初始化脚本 |
| 确保浏览器正确安装和配置 |
| 针对Hugging Face Space环境优化 |
| """ |
|
|
| import subprocess |
| import sys |
| import os |
| from pathlib import Path |
|
|
| def setup_environment(): |
| """设置环境变量,解决权限问题""" |
| |
| current_dir = Path.cwd() |
| app_cache_dir = current_dir / ".cache" |
| |
| |
| if str(current_dir).startswith('/app'): |
| cache_dir = app_cache_dir |
| else: |
| |
| home_dir = Path.home() |
| cache_dir = home_dir / ".cache" |
| |
| |
| cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755) |
| playwright_cache = cache_dir / "ms-playwright" |
| playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755) |
| |
| |
| env_vars = { |
| "PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache), |
| "PLAYWRIGHT_SKIP_BROWSER_GC": "1", |
| "XDG_CACHE_HOME": str(cache_dir), |
| "HOME": str(cache_dir.parent), |
| "TMPDIR": str(cache_dir / "tmp"), |
| "TMP": str(cache_dir / "tmp"), |
| "TEMP": str(cache_dir / "tmp"), |
| } |
| |
| |
| tmp_dir = cache_dir / "tmp" |
| tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755) |
| |
| |
| for key, value in env_vars.items(): |
| os.environ[key] = value |
| print(f"🔧 设置环境变量: {key}={value}") |
| |
| print(f"📁 缓存目录: {cache_dir}") |
| print(f"🎭 Playwright缓存: {playwright_cache}") |
| print(f"📂 当前工作目录: {current_dir}") |
| |
| return str(cache_dir) |
|
|
| def check_playwright_installation(): |
| """检查playwright是否正确安装""" |
| try: |
| print("🔍 开始检查 Playwright 安装...") |
| |
| |
| from playwright.sync_api import sync_playwright |
| print("✅ Playwright 模块导入成功") |
| |
| |
| print("🌐 尝试启动 Chromium 浏览器...") |
| with sync_playwright() as p: |
| browser = p.chromium.launch(headless=True) |
| print("✅ 浏览器启动成功") |
| browser.close() |
| print("✅ 浏览器关闭成功") |
| |
| print("✅ Playwright 浏览器检查通过!") |
| return True |
| |
| except Exception as e: |
| print(f"❌ Playwright 浏览器检查失败: {e}") |
| print(f"🔍 错误类型: {type(e).__name__}") |
| return False |
|
|
| def install_browsers(): |
| """安装playwright浏览器""" |
| try: |
| print("🔄 正在安装 Playwright 浏览器...") |
| |
| |
| print("🔍 当前环境变量:") |
| for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]: |
| print(f" {key}: {os.environ.get(key, 'NOT SET')}") |
| |
| |
| cmd = [sys.executable, "-m", "playwright", "install", "chromium"] |
| print(f"🚀 执行命令: {' '.join(cmd)}") |
| |
| result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy()) |
| |
| print(f"📤 命令返回码: {result.returncode}") |
| if result.stdout: |
| print(f"📜 标准输出:\n{result.stdout}") |
| if result.stderr: |
| print(f"⚠️ 错误输出:\n{result.stderr}") |
| |
| if result.returncode != 0: |
| print("❌ 安装命令执行失败") |
| return False |
| |
| print("✅ Playwright 浏览器安装完成!") |
| return True |
| |
| except subprocess.CalledProcessError as e: |
| print(f"❌ 浏览器安装失败: {e}") |
| return False |
| except Exception as e: |
| print(f"❌ 安装过程中出现异常: {e}") |
| return False |
|
|
| def main(): |
| """主函数""" |
| print("🚀 初始化 Playwright (Hugging Face Space 增强版)...") |
| |
| |
| cache_dir = setup_environment() |
| |
| |
| print("🔍 第一次检查...") |
| if check_playwright_installation(): |
| return True |
| |
| |
| print("🔧 检测到问题,正在重新安装浏览器...") |
| if install_browsers(): |
| |
| print("🔍 安装后重新检查...") |
| return check_playwright_installation() |
| |
| print("😞 所有尝试都失败了") |
| return False |
|
|
| if __name__ == "__main__": |
| success = main() |
| if not success: |
| print("❌ Playwright 初始化失败!") |
| print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖") |
| print("🔍 调试信息:") |
| print(f" - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}") |
| print(f" - 工作目录: {Path.cwd()}") |
| print(f" - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}") |
| sys.exit(1) |
| print("🎉 Playwright 初始化成功!") |