XWX-AI commited on
Commit
7ff2703
·
1 Parent(s): d1f4de0

fix: improve PDF image rendering - remove lazy loading, handle base64 images, add screenshot workaround

Browse files
Files changed (1) hide show
  1. server.js +112 -25
server.js CHANGED
@@ -61,6 +61,15 @@ app.post('/api/generate_pdf', async (req, res) => {
61
 
62
  console.log(`[PDF-GEN] [${getElapsed()}] 解析请求完成: HTML ${htmlSizeMB} MB, 图片 ${imgCount} 张 (${imgSizeMB} MB)`);
63
 
 
 
 
 
 
 
 
 
 
64
  const baseWaitTime = imgCount > 0 ? Math.min(500 + imgCount * 400, 6000) : 500;
65
  const sizeWaitTime = imgSizeMB > 0 ? Math.min(imgSizeMB * 100, 3000) : 0;
66
  const totalWaitTime = Math.min(baseWaitTime + sizeWaitTime, 8000);
@@ -85,7 +94,7 @@ app.post('/api/generate_pdf', async (req, res) => {
85
  await page.setViewport({ width: 1200, height: 800 });
86
 
87
  console.log(`[PDF-GEN] [${getElapsed()}] 正在填充页面内容...`);
88
- await page.setContent(html, {
89
  waitUntil: ['load', 'networkidle0'],
90
  timeout: 120000
91
  });
@@ -95,39 +104,117 @@ app.post('/api/generate_pdf', async (req, res) => {
95
  // 等待 base64 图片完全渲染(检测实际加载状态)
96
  if (imgCount > 0) {
97
  console.log(`[PDF-GEN] [${getElapsed()}] 正在检测 ${imgCount} 张图片加载状态...`);
 
 
 
 
 
 
 
98
  const loadedImages = await page.evaluate(async () => {
99
  const images = document.querySelectorAll('img');
100
- let loaded = 0;
101
- let failed = 0;
102
- let pending = 0;
103
- await Promise.all(Array.from(images).map(img => {
104
- if (img.complete && img.naturalWidth > 0) {
105
- loaded++;
106
- return Promise.resolve();
107
- }
108
- if (img.complete && img.naturalWidth === 0) {
109
- failed++;
110
- return Promise.resolve();
111
- }
112
- pending++;
113
- return new Promise(resolve => {
114
- img.onload = () => { loaded++; resolve(); };
115
- img.onerror = () => { failed++; resolve(); };
116
- setTimeout(() => { if (!img.complete) { pending--; failed++; resolve(); } }, 8000);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  });
118
- }));
119
- return { total: images.length, loaded, failed, pending };
 
 
 
 
 
 
 
 
 
 
 
120
  });
121
- console.log(`[PDF-GEN] [${getElapsed()}] 图片加载统计: 总共=${loadedImages.total}, 成功=${loadedImages.loaded}, 失败=${loadedImages.failed}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  } else {
123
  console.log(`[PDF-GEN] [${getElapsed()}] 无图片,等待 DOM 稳定...`);
124
  await delay(200);
125
  }
126
 
127
- await page.addStyleTag({
128
- content: `body { -webkit-print-color-adjust: exact; }`
129
- });
130
-
131
  console.log(`[PDF-GEN] [${getElapsed()}] 正在生成 PDF 二进制流...`);
132
  const pdfBuffer = await page.pdf({
133
  format: 'A4',
 
61
 
62
  console.log(`[PDF-GEN] [${getElapsed()}] 解析请求完成: HTML ${htmlSizeMB} MB, 图片 ${imgCount} 张 (${imgSizeMB} MB)`);
63
 
64
+ // 关键修复:移除所有图片的 loading="lazy" 属性
65
+ // 问题:loading="lazy" 导致视口外的图片在 PDF 中不渲染
66
+ // 解决方案:https://stackoverflow.com/questions/79156691/puppeteersharp-fails-to-display-base64-encoded-images-in-pdf-output
67
+ const htmlWithoutLazy = html.replace(/loading=["']lazy["']/gi, '');
68
+ console.log(`[PDF-GEN] [${getElapsed()}] 已移除 loading="lazy" 属性`);
69
+
70
+ // 使用移除 lazy 后的 HTML
71
+ const htmlToUse = htmlWithoutLazy;
72
+
73
  const baseWaitTime = imgCount > 0 ? Math.min(500 + imgCount * 400, 6000) : 500;
74
  const sizeWaitTime = imgSizeMB > 0 ? Math.min(imgSizeMB * 100, 3000) : 0;
75
  const totalWaitTime = Math.min(baseWaitTime + sizeWaitTime, 8000);
 
94
  await page.setViewport({ width: 1200, height: 800 });
95
 
96
  console.log(`[PDF-GEN] [${getElapsed()}] 正在填充页面内容...`);
97
+ await page.setContent(htmlToUse, {
98
  waitUntil: ['load', 'networkidle0'],
99
  timeout: 120000
100
  });
 
104
  // 等待 base64 图片完全渲染(检测实际加载状态)
105
  if (imgCount > 0) {
106
  console.log(`[PDF-GEN] [${getElapsed()}] 正在检测 ${imgCount} 张图片加载状态...`);
107
+
108
+ // 只保留基本的打印颜色调整,不注入可能影响公式的CSS
109
+ await page.addStyleTag({
110
+ content: `body { -webkit-print-color-adjust: exact; }`
111
+ });
112
+ console.log(`[PDF-GEN] [${getElapsed()}] CSS 注入完成`);
113
+
114
  const loadedImages = await page.evaluate(async () => {
115
  const images = document.querySelectorAll('img');
116
+ const results = [];
117
+ for (const img of images) {
118
+ const src = img.getAttribute('src') || '';
119
+ const srcPreview = src.substring(0, 80);
120
+ const isBase64 = src.startsWith('data:image/');
121
+ // 对于base64图片,如果src存在且是base64格式,认为已加载(headless模式下complete可能不准确)
122
+ const isLoaded = isBase64 && src.length > 100;
123
+ const status = isLoaded ? (img.naturalWidth > 0 ? 'loaded' : 'loaded-base64') : (img.complete ? (img.naturalWidth > 0 ? 'loaded' : 'error') : 'pending');
124
+ results.push({ src: srcPreview, status, width: img.naturalWidth, isBase64 });
125
+ }
126
+
127
+ // 对于base64图片,跳过等待onload(headless模式不准确)
128
+ const base64Images = Array.from(images).filter(img => {
129
+ const src = img.getAttribute('src') || '';
130
+ return src.startsWith('data:image/') && src.length > 100;
131
+ });
132
+
133
+ if (base64Images.length > 0) {
134
+ console.log(` 检测到 ${base64Images.length} 张base64图片,跳过onload等待(headless模式不准确)`);
135
+ } else {
136
+ await Promise.all(Array.from(images).map(img => {
137
+ if (img.complete && img.naturalWidth > 0) {
138
+ return Promise.resolve();
139
+ }
140
+ return new Promise(resolve => {
141
+ img.onload = () => { resolve(); };
142
+ img.onerror = () => { resolve(); };
143
+ setTimeout(() => { resolve(); }, 15000);
144
+ });
145
+ }));
146
+ }
147
+
148
+ const finalResults = [];
149
+ for (const img of images) {
150
+ const src = img.getAttribute('src') || '';
151
+ const isBase64 = src.startsWith('data:image/') && src.length > 100;
152
+ finalResults.push({
153
+ src: src.substring(0, 80),
154
+ complete: isBase64 || img.complete, // base64图片认为已加载
155
+ width: img.naturalWidth,
156
+ height: img.naturalHeight,
157
+ isBase64
158
  });
159
+ }
160
+
161
+ return {
162
+ initial: results,
163
+ final: finalResults,
164
+ total: images.length
165
+ };
166
+ });
167
+ console.log(`[PDF-GEN] [${getElapsed()}] 图片加载结果:`);
168
+ loadedImages.final.forEach((r, i) => {
169
+ const sizeInfo = r.width > 0 ? `${r.width}x${r.height}` : 'pending';
170
+ const statusInfo = r.complete ? (r.width > 0 ? 'OK' : 'OK(base64)') : 'NOT_COMPLETE';
171
+ console.log(` Image ${i}: ${statusInfo}, ${sizeInfo}${r.isBase64 ? ' (base64)' : ''}`);
172
  });
173
+
174
+ // 只对非base64图片检查失败
175
+ const failedImages = loadedImages.final.filter(r => !r.isBase64 && (r.width === 0 || !r.complete));
176
+ if (failedImages.length > 0) {
177
+ console.log(`[PDF-GEN] [${getElapsed()}] ⚠️ 警告: ${failedImages.length} 张非base64图片加载失败`);
178
+ }
179
+
180
+ // 尝试使用 img.decode() 强制解码验证(根据 GitHub Issue #13726 方案)
181
+ console.log(`[PDF-GEN] [${getElapsed()}] 尝试 img.decode() 强制解码验证...`);
182
+ const decodeResults = await page.evaluate(async () => {
183
+ const images = Array.from(document.querySelectorAll('img'));
184
+ const results = [];
185
+
186
+ for (const img of images) {
187
+ const src = img.getAttribute('src') || '';
188
+ const isBase64 = src.startsWith('data:image/') && src.length > 100;
189
+
190
+ if (isBase64 && img.naturalWidth > 0) {
191
+ try {
192
+ await img.decode();
193
+ results.push({ index: results.length, success: true, msg: 'decode OK' });
194
+ } catch (e) {
195
+ results.push({ index: results.length, success: false, msg: 'decode failed: ' + e.message });
196
+ }
197
+ } else {
198
+ results.push({ index: results.length, success: isBase64, msg: isBase64 ? 'no naturalWidth' : 'not base64' });
199
+ }
200
+ }
201
+
202
+ return results;
203
+ });
204
+
205
+ decodeResults.forEach((r, i) => {
206
+ console.log(` Image ${i}: decode=${r.success ? 'OK' : 'FAIL'} (${r.msg})`);
207
+ });
208
+
209
+ // 根据 GitHub Issue #10341:截图可以强制触发渲染流水线
210
+ console.log(`[PDF-GEN] [${getElapsed()}] 截图强制渲染(Issue #10341 workaround)...`);
211
+ await page.screenshot({ type: 'png' });
212
+ console.log(`[PDF-GEN] [${getElapsed()}] 截图完成`);
213
  } else {
214
  console.log(`[PDF-GEN] [${getElapsed()}] 无图片,等待 DOM 稳定...`);
215
  await delay(200);
216
  }
217
 
 
 
 
 
218
  console.log(`[PDF-GEN] [${getElapsed()}] 正在生成 PDF 二进制流...`);
219
  const pdfBuffer = await page.pdf({
220
  format: 'A4',