Spaces:
Running
Running
feat: PDF backend enhancements — 8 MB HTML size limit, dynamic timeout with cubic formula, protocolTimeout=0, temp file method for large HTML
Browse files
server.js
CHANGED
|
@@ -28,6 +28,7 @@ const cors = require('cors');
|
|
| 28 |
const { getHighlighter } = require('shiki');
|
| 29 |
const fs = require('fs');
|
| 30 |
const path = require('path');
|
|
|
|
| 31 |
|
| 32 |
let ChartJSNodeCanvas = null;
|
| 33 |
try {
|
|
@@ -188,12 +189,23 @@ app.post('/api/generate_pdf', async (req, res) => {
|
|
| 188 |
}
|
| 189 |
|
| 190 |
const brandText = showWatermark !== false ? 'Powered by XWX AI Chat Exporter' : '';
|
| 191 |
-
const
|
|
|
|
| 192 |
const imgCount = imageCount || 0;
|
| 193 |
const imgSizeMB = totalImageSizeMB || 0;
|
| 194 |
|
| 195 |
console.log(`[PDF-GEN] [${getElapsed()}] 解析请求完成: HTML ${htmlSizeMB} MB, 图片 ${imgCount} 张 (${imgSizeMB} MB)`);
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
// 移除 <script> 标签防止 Puppeteer 执行阻塞(如 alert() 弹窗会导致 setContent 内部 CDP 调用超时)
|
| 198 |
htmlToProcess = htmlToProcess.replace(/<script[\s\S]*?<\/script>/gi, '');
|
| 199 |
console.log(`[PDF-GEN] [${getElapsed()}] 已移除 <script> 标签`);
|
|
@@ -211,8 +223,16 @@ app.post('/api/generate_pdf', async (req, res) => {
|
|
| 211 |
const sizeWaitTime = imgSizeMB > 0 ? Math.min(imgSizeMB * 100, 3000) : 0;
|
| 212 |
const totalWaitTime = Math.min(baseWaitTime + sizeWaitTime, 8000);
|
| 213 |
|
| 214 |
-
// 动态计算
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
const baseTimeout = 60000;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
let extraMs = 0;
|
| 217 |
if (imgCount <= 30) {
|
| 218 |
extraMs = imgCount * 3000;
|
|
@@ -224,18 +244,29 @@ app.post('/api/generate_pdf', async (req, res) => {
|
|
| 224 |
extraMs = 640000 + (imgCount - 100) * 5600;
|
| 225 |
}
|
| 226 |
const sizeExtraMs = (imgSizeMB || 0) * 3000;
|
| 227 |
-
const networkTimeout = Math.min(baseTimeout + extraMs + sizeExtraMs, 1200000);
|
| 228 |
|
| 229 |
// 动态计算 setContent 超时上限(setContent 需要解析 HTML + 加载资源)
|
| 230 |
-
const setContentTimeout = Math.min(60000 + extraMs + sizeExtraMs,
|
| 231 |
|
| 232 |
// 动态计算截图超时上限
|
| 233 |
-
const screenshotTimeout = Math.min(30000 + (imgCount > 0 ? extraMs / 10 : 0), 120000);
|
| 234 |
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
console.log(`[PDF-GEN] [${getElapsed()}] 正在启动浏览器...`);
|
|
|
|
|
|
|
|
|
|
| 237 |
browser = await puppeteer.launch({
|
| 238 |
executablePath: '/usr/bin/chromium',
|
|
|
|
| 239 |
args: [
|
| 240 |
'--no-sandbox',
|
| 241 |
'--disable-setuid-sandbox',
|
|
@@ -254,11 +285,25 @@ app.post('/api/generate_pdf', async (req, res) => {
|
|
| 254 |
await page.setViewport({ width: 1200, height: 800 });
|
| 255 |
console.log(`[PDF-GEN] [${getElapsed()}] Viewport: 1200x800`);
|
| 256 |
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
await page.waitForNetworkIdle({ idleTime: 500, timeout: networkTimeout });
|
| 263 |
console.log(`[PDF-GEN] [${getElapsed()}] 页面内容加载完成`);
|
| 264 |
|
|
@@ -378,11 +423,10 @@ app.post('/api/generate_pdf', async (req, res) => {
|
|
| 378 |
|
| 379 |
console.log(`[PDF-GEN] [${getElapsed()}] 正在生成 PDF 二进制流...`);
|
| 380 |
|
| 381 |
-
//
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
const pdfBuffer = await page.pdf({
|
| 386 |
format: 'A4',
|
| 387 |
printBackground: true,
|
| 388 |
preferCSSPageSize: true,
|
|
@@ -399,9 +443,22 @@ app.post('/api/generate_pdf', async (req, res) => {
|
|
| 399 |
bottom: '20mm',
|
| 400 |
left: '10mm',
|
| 401 |
right: '10mm'
|
| 402 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
});
|
| 404 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
const pdfSizeMB = (pdfBuffer.length / 1024 / 1024).toFixed(2);
|
| 406 |
console.log(`[PDF-GEN] [${getElapsed()}] PDF 生成成功 (${pdfSizeMB} MB),正在关闭浏览器...`);
|
| 407 |
|
|
@@ -419,6 +476,10 @@ app.post('/api/generate_pdf', async (req, res) => {
|
|
| 419 |
if (browser) {
|
| 420 |
try { await browser.close(); } catch (e) {}
|
| 421 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
res.status(500).json({ error: 'Internal Server Error', details: error.message });
|
| 423 |
}
|
| 424 |
});
|
|
|
|
| 28 |
const { getHighlighter } = require('shiki');
|
| 29 |
const fs = require('fs');
|
| 30 |
const path = require('path');
|
| 31 |
+
const os = require('os');
|
| 32 |
|
| 33 |
let ChartJSNodeCanvas = null;
|
| 34 |
try {
|
|
|
|
| 189 |
}
|
| 190 |
|
| 191 |
const brandText = showWatermark !== false ? 'Powered by XWX AI Chat Exporter' : '';
|
| 192 |
+
const htmlSizeMBNum = Buffer.byteLength(html, 'utf8') / (1024 * 1024);
|
| 193 |
+
const htmlSizeMB = htmlSizeMBNum.toFixed(2);
|
| 194 |
const imgCount = imageCount || 0;
|
| 195 |
const imgSizeMB = totalImageSizeMB || 0;
|
| 196 |
|
| 197 |
console.log(`[PDF-GEN] [${getElapsed()}] 解析请求完成: HTML ${htmlSizeMB} MB, 图片 ${imgCount} 张 (${imgSizeMB} MB)`);
|
| 198 |
|
| 199 |
+
// HTML 大小硬性上限:超过 8 MB 时 Chromium PDF 引擎会崩溃
|
| 200 |
+
// Benchmark 验证:7.01 MB 需要 31 分钟,已超过用户可接受等待时间
|
| 201 |
+
// 建议用户减少内容量(如精简代码块、移除冗余样式)后重试
|
| 202 |
+
const MAX_HTML_SIZE_MB = 8;
|
| 203 |
+
if (htmlSizeMBNum > MAX_HTML_SIZE_MB) {
|
| 204 |
+
const errorMsg = `HTML 内容过大 (${htmlSizeMB} MB,超过 ${MAX_HTML_SIZE_MB} MB 上限),无法生成 PDF。建议:1) 精简对话内容;2) 移除大型代码块;3) 分批导出。`;
|
| 205 |
+
console.log(`[PDF-GEN] [${getElapsed()}] ${errorMsg}`);
|
| 206 |
+
return res.status(413).json({ error: errorMsg });
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
// 移除 <script> 标签防止 Puppeteer 执行阻塞(如 alert() 弹窗会导致 setContent 内部 CDP 调用超时)
|
| 210 |
htmlToProcess = htmlToProcess.replace(/<script[\s\S]*?<\/script>/gi, '');
|
| 211 |
console.log(`[PDF-GEN] [${getElapsed()}] 已移除 <script> 标签`);
|
|
|
|
| 223 |
const sizeWaitTime = imgSizeMB > 0 ? Math.min(imgSizeMB * 100, 3000) : 0;
|
| 224 |
const totalWaitTime = Math.min(baseWaitTime + sizeWaitTime, 8000);
|
| 225 |
|
| 226 |
+
// 动态计算超时上限
|
| 227 |
+
// Benchmark 数据揭示 PDF 渲染时间呈三次方增长:
|
| 228 |
+
// 0.82MB→8s, 1.64MB→16s, 3.28MB→85s, 4.92MB→>600s, 7.01MB→>1183s
|
| 229 |
+
// 拟合公式: pdfRenderMs ≈ 10*size³ + 10*size² + 2*size (秒)
|
| 230 |
+
// 超时 = 2× 安全系数, 上限 1800s (30 min)
|
| 231 |
const baseTimeout = 60000;
|
| 232 |
+
const s = htmlSizeMBNum;
|
| 233 |
+
const pdfRenderMs = (10000 * s * s * s + 10000 * s * s + 2000 * s); // 预估 PDF 实际渲染时间 (ms)
|
| 234 |
+
const htmlExtraMs = Math.ceil(pdfRenderMs * 2); // 2x 安全系数
|
| 235 |
+
|
| 236 |
let extraMs = 0;
|
| 237 |
if (imgCount <= 30) {
|
| 238 |
extraMs = imgCount * 3000;
|
|
|
|
| 244 |
extraMs = 640000 + (imgCount - 100) * 5600;
|
| 245 |
}
|
| 246 |
const sizeExtraMs = (imgSizeMB || 0) * 3000;
|
| 247 |
+
const networkTimeout = Math.min(baseTimeout + extraMs + sizeExtraMs + htmlExtraMs, 1200000);
|
| 248 |
|
| 249 |
// 动态计算 setContent 超时上限(setContent 需要解析 HTML + 加载资源)
|
| 250 |
+
const setContentTimeout = Math.min(60000 + extraMs + sizeExtraMs + htmlExtraMs, 3600000);
|
| 251 |
|
| 252 |
// 动态计算截图超时上限
|
| 253 |
+
const screenshotTimeout = Math.min(30000 + (imgCount > 0 ? extraMs / 10 : 0) + htmlExtraMs / 10, 120000);
|
| 254 |
|
| 255 |
+
// 动态计算 PDF 生成超时上限
|
| 256 |
+
const pdfTimeout = Math.min(120000 + extraMs / 5 + sizeExtraMs / 5 + htmlExtraMs, 3600000);
|
| 257 |
+
|
| 258 |
+
console.log(`[PDF-GEN] [${getElapsed()}] 动态超时: setContent=${(setContentTimeout / 1000).toFixed(0)}s, waitForNetworkIdle=${(networkTimeout / 1000).toFixed(0)}s, pdf=${(pdfTimeout / 1000).toFixed(0)}s (HTML=${htmlSizeMB}MB, 图片=${imgCount}张)`);
|
| 259 |
+
if (htmlSizeMBNum > 4) {
|
| 260 |
+
const estMinutes = (pdfTimeout / 60000).toFixed(0);
|
| 261 |
+
console.log(`[PDF-GEN] [${getElapsed()}] ⚠️ 大文件预警: ${htmlSizeMB} MB HTML 预计需要 ${estMinutes} 分钟`);
|
| 262 |
+
}
|
| 263 |
console.log(`[PDF-GEN] [${getElapsed()}] 正在启动浏览器...`);
|
| 264 |
+
// protocolTimeout: 0 = 禁用 CDP 协议层超时
|
| 265 |
+
// 参考: https://github.com/puppeteer/puppeteer/issues/9927
|
| 266 |
+
// PDF 超时时由应用层 Promise.race 控制,不依赖协议层超时
|
| 267 |
browser = await puppeteer.launch({
|
| 268 |
executablePath: '/usr/bin/chromium',
|
| 269 |
+
protocolTimeout: 0,
|
| 270 |
args: [
|
| 271 |
'--no-sandbox',
|
| 272 |
'--disable-setuid-sandbox',
|
|
|
|
| 285 |
await page.setViewport({ width: 1200, height: 800 });
|
| 286 |
console.log(`[PDF-GEN] [${getElapsed()}] Viewport: 1200x800`);
|
| 287 |
|
| 288 |
+
// 大 HTML (> 5 MB) 使用临时文件法,避免 CDP WebSocket 传输限制
|
| 289 |
+
// 参考: https://danindu.medium.com/optimizing-puppeteer-for-pdf-generation-8b7777edbeca
|
| 290 |
+
const isLargeHtml = htmlSizeMBNum > 5;
|
| 291 |
+
let tempFilePath = null;
|
| 292 |
+
|
| 293 |
+
console.log(`[PDF-GEN] [${getElapsed()}] 正在${isLargeHtml ? '通过临时文件加载' : '填充'}页面内容...`);
|
| 294 |
+
if (isLargeHtml) {
|
| 295 |
+
tempFilePath = path.join(os.tmpdir(), `xwx-pdf-${Date.now()}.html`);
|
| 296 |
+
fs.writeFileSync(tempFilePath, htmlToUse, 'utf8');
|
| 297 |
+
await page.goto(`file://${tempFilePath}`, {
|
| 298 |
+
waitUntil: ['load', 'networkidle0'],
|
| 299 |
+
timeout: setContentTimeout
|
| 300 |
+
});
|
| 301 |
+
} else {
|
| 302 |
+
await page.setContent(htmlToUse, {
|
| 303 |
+
waitUntil: ['load', 'networkidle0'],
|
| 304 |
+
timeout: setContentTimeout
|
| 305 |
+
});
|
| 306 |
+
}
|
| 307 |
await page.waitForNetworkIdle({ idleTime: 500, timeout: networkTimeout });
|
| 308 |
console.log(`[PDF-GEN] [${getElapsed()}] 页面内容加载完成`);
|
| 309 |
|
|
|
|
| 423 |
|
| 424 |
console.log(`[PDF-GEN] [${getElapsed()}] 正在生成 PDF 二进制流...`);
|
| 425 |
|
| 426 |
+
// timeout: 0 = 禁用 page.pdf() 内部超时(默认 30s)
|
| 427 |
+
// 参考: https://stackoverflow.com/questions/69436420
|
| 428 |
+
// 应用层用 Promise.race 做超时控制
|
| 429 |
+
const pdfPromise = page.pdf({
|
|
|
|
| 430 |
format: 'A4',
|
| 431 |
printBackground: true,
|
| 432 |
preferCSSPageSize: true,
|
|
|
|
| 443 |
bottom: '20mm',
|
| 444 |
left: '10mm',
|
| 445 |
right: '10mm'
|
| 446 |
+
},
|
| 447 |
+
timeout: 0
|
| 448 |
+
});
|
| 449 |
+
|
| 450 |
+
const timeoutPromise = new Promise((_, reject) => {
|
| 451 |
+
setTimeout(() => reject(new Error(`PDF 生成超时 (${(pdfTimeout / 1000).toFixed(0)}s)`)), pdfTimeout);
|
| 452 |
});
|
| 453 |
|
| 454 |
+
const pdfBuffer = await Promise.race([pdfPromise, timeoutPromise]);
|
| 455 |
+
|
| 456 |
+
// 清理临时文件
|
| 457 |
+
if (tempFilePath) {
|
| 458 |
+
try { fs.unlinkSync(tempFilePath); } catch {}
|
| 459 |
+
console.log(`[PDF-GEN] [${getElapsed()}] 已清理临时文件`);
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
const pdfSizeMB = (pdfBuffer.length / 1024 / 1024).toFixed(2);
|
| 463 |
console.log(`[PDF-GEN] [${getElapsed()}] PDF 生成成功 (${pdfSizeMB} MB),正在关闭浏览器...`);
|
| 464 |
|
|
|
|
| 476 |
if (browser) {
|
| 477 |
try { await browser.close(); } catch (e) {}
|
| 478 |
}
|
| 479 |
+
// 清理临时文件
|
| 480 |
+
if (tempFilePath) {
|
| 481 |
+
try { fs.unlinkSync(tempFilePath); } catch {}
|
| 482 |
+
}
|
| 483 |
res.status(500).json({ error: 'Internal Server Error', details: error.message });
|
| 484 |
}
|
| 485 |
});
|