XWX-AI commited on
Commit
8617367
·
1 Parent(s): c008055

Switch to Node.js + Puppeteer for 100% fidelity PDF

Browse files
Files changed (5) hide show
  1. Dockerfile +12 -18
  2. app.py +0 -58
  3. package.json +15 -0
  4. requirements.txt +0 -4
  5. server.js +82 -0
Dockerfile CHANGED
@@ -1,33 +1,27 @@
1
 
2
- # Use Python 3.9 as base (stable for WeasyPrint)
3
- FROM python:3.9-slim
4
 
5
- # 1. Install System Dependencies (The "Construction Team")
6
- # We install Pango, Cairo, and fonts for CJK support
7
  RUN apt-get update && apt-get install -y \
8
- libpango-1.0-0 \
9
- libpangoft2-1.0-0 \
10
- libharfbuzz-subset0 \
11
- libjpeg-dev \
12
- libopenjp2-7-dev \
13
- libxcb1 \
14
- fontconfig \
15
- fonts-noto-cjk \
16
  && rm -rf /var/lib/apt/lists/*
17
 
18
  # 2. Set working directory
19
  WORKDIR /app
20
 
21
- # 3. Install Python Dependencies
22
- COPY requirements.txt .
23
- RUN pip install --no-cache-dir -r requirements.txt
24
 
25
  # 4. Copy Application Code
26
  COPY . .
27
 
28
- # 5. Expose Port (Hugging Face expects port 7860)
29
  EXPOSE 7860
30
 
31
  # 6. Run the Application
32
- # Using Gunicorn for production-grade performance
33
- CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
 
1
 
2
+ # Use Node.js 20 on Debian Bullseye (Stable for Puppeteer)
3
+ FROM node:20-bullseye-slim
4
 
5
+ # 1. Install Chrome Dependencies
6
+ # Puppeteer requires a lot of system libraries to run Chrome in Docker
7
  RUN apt-get update && apt-get install -y \
8
+ chromium \
9
+ fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 \
10
+ --no-install-recommends \
 
 
 
 
 
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
  # 2. Set working directory
14
  WORKDIR /app
15
 
16
+ # 3. Install Node.js Dependencies
17
+ COPY package.json .
18
+ RUN npm install
19
 
20
  # 4. Copy Application Code
21
  COPY . .
22
 
23
+ # 5. Expose Port (Hugging Face expects 7860)
24
  EXPOSE 7860
25
 
26
  # 6. Run the Application
27
+ CMD ["node", "server.js"]
 
app.py DELETED
@@ -1,58 +0,0 @@
1
-
2
- from flask import Flask, request, send_file
3
- from weasyprint import HTML, CSS
4
- from weasyprint.text.fonts import FontConfiguration
5
- import io
6
-
7
- app = Flask(__name__)
8
-
9
- # Basic Health Check
10
- @app.route('/')
11
- def home():
12
- return "Backend Service Running"
13
-
14
- @app.route('/api/generate_pdf', methods=['POST'])
15
- def generate_pdf():
16
- try:
17
- data = request.json
18
- if not data or 'html' not in data:
19
- return {"error": "Missing 'html' field"}, 400
20
-
21
- html_content = data['html']
22
-
23
- # Prepare buffer
24
- pdf_buffer = io.BytesIO()
25
-
26
- # Font Config (We rely on system fonts installed via Dockerfile)
27
- # fonts-noto-cjk provides "Noto Sans CJK SC"
28
- font_config = FontConfiguration()
29
-
30
- # CSS to enforce fonts
31
- css = CSS(string='''
32
- @page { margin: 20mm; }
33
- body { font-family: "Noto Sans CJK SC", sans-serif !important; }
34
- img { max-width: 100%; height: auto; }
35
- pre { white-space: pre-wrap; background: #f5f5f5; padding: 10px; }
36
- ''')
37
-
38
- HTML(string=html_content).write_pdf(
39
- pdf_buffer,
40
- stylesheets=[css],
41
- font_config=font_config
42
- )
43
-
44
- pdf_buffer.seek(0)
45
-
46
- return send_file(
47
- pdf_buffer,
48
- mimetype='application/pdf',
49
- as_attachment=True,
50
- download_name='export.pdf'
51
- )
52
-
53
- except Exception as e:
54
- print(f"Error: {e}")
55
- return {"error": str(e)}, 500
56
-
57
- if __name__ == '__main__':
58
- app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
package.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "pdf-server",
4
+ "version": "1.0.0",
5
+ "description": "Puppeteer PDF Generator for Hugging Face Spaces",
6
+ "main": "server.js",
7
+ "dependencies": {
8
+ "express": "^4.18.2",
9
+ "puppeteer": "^22.0.0",
10
+ "cors": "^2.8.5"
11
+ },
12
+ "scripts": {
13
+ "start": "node server.js"
14
+ }
15
+ }
requirements.txt DELETED
@@ -1,4 +0,0 @@
1
-
2
- Flask==3.0.0
3
- WeasyPrint==60.1
4
- gunicorn==21.2.0
 
 
 
 
 
server.js ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ const express = require('express');
3
+ const puppeteer = require('puppeteer');
4
+ const cors = require('cors');
5
+
6
+ const app = express();
7
+ const port = 7860;
8
+
9
+ // Enable CORS for all origins (or restrict to gemini.google.com)
10
+ app.use(cors());
11
+
12
+ // Increase payload limit for large HTML
13
+ app.use(express.json({ limit: '50mb' }));
14
+
15
+ app.get('/', (req, res) => {
16
+ res.send('Puppeteer PDF Server Running');
17
+ });
18
+
19
+ app.post('/api/generate_pdf', async (req, res) => {
20
+ let browser = null;
21
+ try {
22
+ const { html } = req.body;
23
+ if (!html) {
24
+ return res.status(400).json({ error: 'Missing html content' });
25
+ }
26
+
27
+ // Launch Chrome
28
+ // We use the installed 'chromium' from apt-get
29
+ browser = await puppeteer.launch({
30
+ executablePath: '/usr/bin/chromium',
31
+ args: [
32
+ '--no-sandbox',
33
+ '--disable-setuid-sandbox',
34
+ '--disable-dev-shm-usage', // Critical for Docker
35
+ '--font-render-hinting=none' // Better font rendering
36
+ ],
37
+ headless: 'new'
38
+ });
39
+
40
+ const page = await browser.newPage();
41
+
42
+ // Set content
43
+ await page.setContent(html, { waitUntil: 'networkidle0' });
44
+
45
+ // Inject styles to ensure 100% width and print media simulation
46
+ await page.addStyleTag({
47
+ content: `
48
+ @page { margin: 20mm; size: A4; }
49
+ body { -webkit-print-color-adjust: exact; }
50
+ `
51
+ });
52
+
53
+ // Generate PDF
54
+ const pdfBuffer = await page.pdf({
55
+ format: 'A4',
56
+ printBackground: true,
57
+ margin: {
58
+ top: '20mm',
59
+ bottom: '20mm',
60
+ left: '10mm',
61
+ right: '10mm'
62
+ }
63
+ });
64
+
65
+ await browser.close();
66
+ browser = null;
67
+
68
+ // Send response
69
+ res.setHeader('Content-Type', 'application/pdf');
70
+ res.setHeader('Content-Disposition', 'attachment; filename=export.pdf');
71
+ res.send(pdfBuffer);
72
+
73
+ } catch (error) {
74
+ console.error('PDF Generation Error:', error);
75
+ if (browser) await browser.close();
76
+ res.status(500).json({ error: 'Internal Server Error', details: error.message });
77
+ }
78
+ });
79
+
80
+ app.listen(port, () => {
81
+ console.log(`Server listening at http://localhost:${port}`);
82
+ });