/** * PDF Generation Server for XWX AI Chat Exporter * * Key Optimizations (2026-02-13): * * 1. headless: 'shell' mode (CRITICAL) * - Issue: Puppeteer's new headless mode ('new') causes severe PDF file size inflation * (e.g., 1.27MB images → 9.83MB PDF, nearly 8x larger) * - Solution: Use 'shell' mode instead of 'new' mode * - Result: PDF size reduced to normal (1.27MB images → ~1.5MB PDF) * - Reference: https://github.com/puppeteer/puppeteer/issues/458 * * 2. Extended image loading timeout (8 seconds) * - Issue: Base64 images need time to decode and render in Chromium * 2-second timeout caused 30% of images to fail loading * - Solution: Increased timeout from 2s to 8s for reliable base64 image rendering * - Result: 100% image loading success rate * * 3. waitForNetworkIdle after setContent * - Issue: page.setContent() doesn't wait for all resources to stabilize * - Solution: Added waitForNetworkIdle({ idleTime: 500 }) after setContent * - Result: Ensures all base64 images are fully decoded before PDF generation */ const express = require('express'); const puppeteer = require('puppeteer'); const cors = require('cors'); const { getHighlighter } = require('shiki'); const fs = require('fs'); const path = require('path'); const os = require('os'); let ChartJSNodeCanvas = null; try { ChartJSNodeCanvas = require('chartjs-node-canvas').ChartJSNodeCanvas; } catch (e) { console.log('[WIDGET] chartjs-node-canvas not yet installed, will retry on demand'); } // ─── Shiki Highlighter Initialization ───────────────── // Maps frontend codeTheme settings to Shiki theme names const THEME_MAP = { 'github': 'github-light', 'monokai': 'monokai', 'oneDark': 'one-dark-pro', }; let shikiHighlighter = null; async function initShiki() { try { console.log('[Shiki] Initializing highlighter with bundled languages...'); shikiHighlighter = await getHighlighter({ themes: ['github-light', 'monokai', 'one-dark-pro'], langs: [ // Core languages (all included in shiki default bundle) 'javascript', 'typescript', 'python', 'java', 'c', 'cpp', 'csharp', 'go', 'rust', 'ruby', 'php', 'swift', 'kotlin', 'sql', 'bash', 'shell', 'yaml', 'json', 'html', 'xml', 'css', 'scss', 'less', 'markdown', 'diff', 'dockerfile', 'lua', 'r', 'dart', 'scala', // Additional languages 'perl', 'haskell', 'erlang', 'elixir', 'clojure', 'groovy', 'objective-c', 'asm', 'powershell', 'makefile', 'cmake', 'protobuf', 'graphql', 'toml', 'ini', 'git-rebase', // Rare languages (verified in shiki default bundle) 'abap', 'cobol', 'pascal', 'racket', 'latex', 'tex', 'viml', 'nginx', 'apache', 'vue', 'svelte', 'zig', 'matlab', 'julia', 'astro', ], }); const langs = shikiHighlighter.getLoadedLanguages(); console.log(`[Shiki] Highlighter ready with ${langs.length} languages: ${langs.slice(0, 20).join(', ')}...`); } catch (e) { console.error('[Shiki] Failed to initialize:', e.message); shikiHighlighter = null; } } // Pre-highlight code blocks in HTML using Shiki (inline styles) function highlightHtmlWithShiki(html, themeName = 'github-light') { if (!shikiHighlighter) return html; const shikiTheme = THEME_MAP[themeName] || 'github-light'; return html.replace(/
]*)>]*>([\s\S]*?)<\/code><\/pre>/gi, (match, preAttrs, codeText) => {
// Skip if already has syntax spans (Shiki-style or hljs-style)
if (/').replace(/&/g, '&').replace(/"/g, '"'),
{ lang, theme: shikiTheme }
);
// Restore data-language attribute for CSS language label display
highlighted = highlighted.replace(/ {
console.log('[Shiki] Startup initialization complete.');
}).catch(() => {});
const app = express();
const port = process.env.PORT || 7860;
const isTest = process.env.NODE_ENV === 'test';
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
app.use(cors());
app.use(express.json({ limit: '50mb' }));
app.get('/', (req, res) => {
res.send(`Puppeteer PDF Server Running (${isTest ? '测试环境' : '生产环境'})`);
});
app.post('/api/generate_pdf', async (req, res) => {
const startTime = Date.now();
const envText = isTest ? '【测试环境】' : '【生产环境】';
const { platform, exportCount, language, exportPdf, exportMd, exportTxt, exportDocx, exportJson, exportClipboard, exportNotion, extensionVersion, imageCount, totalImageSizeMB, messageCount, isPro } = req.body;
// PRO status with text emphasis (no ANSI colors for Docker compatibility)
const userTag = isPro ? '★★★PRO★★★' : '◆FREE◆';
const platformStr = platform || 'unknown';
const versionStr = extensionVersion || '-';
const langStr = language || '-';
// Format counts
const fmtParts = [];
if (exportPdf != null) fmtParts.push(`PDF:${exportPdf}`);
if (exportMd != null) fmtParts.push(`MD:${exportMd}`);
if (exportTxt != null) fmtParts.push(`TXT:${exportTxt}`);
if (exportDocx != null) fmtParts.push(`DOCX:${exportDocx}`);
if (exportJson != null) fmtParts.push(`JSON:${exportJson}`);
if (exportClipboard != null) fmtParts.push(`CLIP:${exportClipboard}`);
if (exportNotion != null) fmtParts.push(`NOTION:${exportNotion}`);
const fmtStr = fmtParts.join(', ') || '-';
// Two-line log for readability
console.log(
`---------------[PDF-GEN] ${envText} ${userTag} 收到请求 | 平台: ${platformStr} | 版本: ${versionStr} | 语言: ${langStr}`
);
console.log(
` 导出: ${exportCount ?? '-'}次 | 格式: ${fmtStr} | 消息: ${messageCount ?? '-'}条 | 图片: ${imageCount ?? '-'}张 (${totalImageSizeMB ?? '-'}MB)`
);
console.log(`---------------`);
const getElapsed = () => ((Date.now() - startTime) / 1000).toFixed(2) + 's';
let browser = null;
try {
const { html, showWatermark, imageCount, totalImageSizeMB, messageCount, codeTheme, textOnlySizeMB } = req.body;
if (!html) {
return res.status(400).json({ error: 'Missing html content' });
}
// ─── Syntax Highlighting Path Selection ───
// Priority: codeTheme param > HTML detection > Shiki default
// 1. If codeTheme is sent (new plugin) → always use Shiki
// 2. If no codeTheme → check for highlight.js script in HTML
// - Found → legacy highlight.js path (old plugin)
// - Not found → Shiki default (no highlighting in HTML)
const hasHighlightJsScript = html.includes('highlight.min.js') || html.includes('highlight.full.min.js') || html.includes('hljs.highlightAll');
let htmlToProcess = html;
if (codeTheme) {
console.log(`[PDF-GEN] [${getElapsed()}] codeTheme provided (${codeTheme}), using Shiki`);
htmlToProcess = highlightHtmlWithShiki(html, codeTheme);
} else if (hasHighlightJsScript && shikiHighlighter) {
console.log(`[PDF-GEN] [${getElapsed()}] No codeTheme, detected highlight.js in HTML, using legacy path`);
} else if (shikiHighlighter) {
console.log(`[PDF-GEN] [${getElapsed()}] No codeTheme, no highlight.js, using Shiki default`);
htmlToProcess = highlightHtmlWithShiki(html, 'github');
}
const brandText = showWatermark !== false ? 'Powered by XWX AI Chat Exporter' : '';
const htmlSizeMBNum = Buffer.byteLength(html, 'utf8') / (1024 * 1024);
const htmlSizeMB = htmlSizeMBNum.toFixed(2);
const imgCount = imageCount || 0;
const imgSizeMB = totalImageSizeMB || 0;
// 使用前端传来的纯文本 HTML 大小(扣除 base64 图片)做时间预估
// base64 图片不增加 Chromium PDF 引擎的渲染复杂度,benchmark 公式也是基于纯文本校准的
// 如果前端未提供(旧版本插件),回退到总大小(向后兼容)
const effectiveSizeMB = textOnlySizeMB != null ? textOnlySizeMB : htmlSizeMBNum;
console.log(`[PDF-GEN] [${getElapsed()}] 解析请求完成: HTML ${htmlSizeMB} MB (纯文本=${effectiveSizeMB.toFixed(2)} MB), 消息 ${messageCount || 0} 条, 图片 ${imgCount} 张 (${imgSizeMB} MB)`);
// DEBUG: Count actual images in HTML received
const htmlImgTagRegex = /
]+src=["']data:image\/[^"']+/gi;
const htmlImgTags = htmlToProcess.match(htmlImgTagRegex);
const htmlImageCount = htmlImgTags ? htmlImgTags.length : 0;
console.log(`[PDF-IMAGE] HTML contains ${htmlImageCount}
tags with data: URLs (frontend reported ${imgCount})`);
if (htmlImgTags && htmlImgTags.length > 0) {
// Log src type distribution
const pngCount = htmlImgTags.filter(t => t.includes('data:image/png')).length;
const jpegCount = htmlImgTags.filter(t => t.includes('data:image/jpeg') || t.includes('data:image/jpg')).length;
const gifCount = htmlImgTags.filter(t => t.includes('data:image/gif')).length;
const svgCount = htmlImgTags.filter(t => t.includes('data:image/svg')).length;
const webpCount = htmlImgTags.filter(t => t.includes('data:image/webp')).length;
console.log(`[PDF-IMAGE] Format breakdown: PNG=${pngCount}, JPEG=${jpegCount}, GIF=${gifCount}, SVG=${svgCount}, WebP=${webpCount}`);
}
// HTML 大小预警:超过 10 MB 时 Chromium PDF 引擎可能崩溃,但不阻止处理
// 前端已显示警告提示用户,这里仅记录日志
// Benchmark 验证:7.01 MB 需要 31 分钟,大文件可能超时
const MAX_RECOMMENDED_TEXT_HTML_SIZE_MB = 10;
if (effectiveSizeMB > MAX_RECOMMENDED_TEXT_HTML_SIZE_MB) {
console.log(`[PDF-GEN] [${getElapsed()}] ⚠️ 大文件预警: 纯文本=${effectiveSizeMB.toFixed(2)} MB,超过推荐上限 ${MAX_RECOMMENDED_TEXT_HTML_SIZE_MB} MB,Chromium PDF 引擎可能超时或崩溃,继续尝试处理`);
}
// 移除