internationalscholarsprogram commited on
Commit
2deab8c
Β·
verified Β·
1 Parent(s): 197ee59

Initial deploy: ISP Handbook PDF engine

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .dockerignore +18 -0
  2. .env.example +35 -0
  3. .gitattributes +7 -35
  4. .gitignore +11 -0
  5. Dockerfile +57 -0
  6. README.md +300 -5
  7. app/__init__.py +0 -0
  8. app/api/__init__.py +0 -0
  9. app/api/routes.py +214 -0
  10. app/core/__init__.py +0 -0
  11. app/core/config.py +57 -0
  12. app/core/fonts.py +105 -0
  13. app/core/logging.py +10 -0
  14. app/core/theme.py +169 -0
  15. app/main.py +64 -0
  16. app/models/__init__.py +0 -0
  17. app/repositories/__init__.py +0 -0
  18. app/schemas/__init__.py +0 -0
  19. app/schemas/handbook.py +90 -0
  20. app/services/__init__.py +0 -0
  21. app/services/data_fetcher.py +242 -0
  22. app/services/html_builder.py +650 -0
  23. app/services/normalizer.py +945 -0
  24. app/services/pdf_renderer.py +326 -0
  25. app/services/pdf_service.py +213 -0
  26. app/services/renderers.py +1097 -0
  27. app/services/utils.py +259 -0
  28. app/static/css/print.css +1344 -0
  29. app/templates/handbook.html +126 -0
  30. app/templates/partials/blocks/bullet_list.html +22 -0
  31. app/templates/partials/blocks/enrollment_steps.html +39 -0
  32. app/templates/partials/blocks/heading.html +6 -0
  33. app/templates/partials/blocks/note.html +34 -0
  34. app/templates/partials/blocks/paragraph.html +6 -0
  35. app/templates/partials/blocks/render_block.html +18 -0
  36. app/templates/partials/blocks/school_profile.html +122 -0
  37. app/templates/partials/blocks/table.html +91 -0
  38. app/templates/partials/blocks/university_summary.html +6 -0
  39. app/templates/partials/cover.html +4 -0
  40. app/templates/partials/section.html +6 -0
  41. app/templates/partials/toc.html +17 -0
  42. app/templates/partials/university.html +126 -0
  43. fonts/GOTHIC.TTF +3 -0
  44. fonts/GOTHICB.TTF +3 -0
  45. fonts/GOTHICBI.TTF +3 -0
  46. fonts/GOTHICI.TTF +3 -0
  47. images/ISP Handbook_Global-60-66_page-0001.jpg +3 -0
  48. images/ISP Handbook_Global-60-66_page-0002.jpg +3 -0
  49. images/IUP.webp +3 -0
  50. images/LOPY-61-65_page-0003.jpg +3 -0
.dockerignore ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .env
5
+ .venv/
6
+ venv/
7
+ dist/
8
+ *.egg-info/
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+ tests/
12
+ *.md
13
+ *.pdf
14
+ app/handbook_pdf/
15
+ app/__pycache__/
16
+ images/*.pdf
17
+ _*.py
18
+ _*.html
.env.example ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── App ──
2
+ APP_NAME=ISP Handbook Service
3
+ APP_VERSION=1.0.0
4
+ DEBUG=false
5
+ PORT=7860
6
+
7
+ # ── Database (MySQL) ──
8
+ DB_HOST=localhost
9
+ DB_PORT=3306
10
+ DB_USER=root
11
+ DB_PASSWORD=
12
+ DB_NAME=handbook
13
+ DB_CHARSET=utf8mb4
14
+
15
+ # ── External API endpoints (source-of-truth JSON APIs) ──
16
+ # If set, these override the computed URLs from API_BASE_URL + paths.
17
+ HANDBOOK_GENERAL_ENDPOINT=
18
+ UNIVERSITY_HANDBOOK_ENDPOINT=
19
+
20
+ # Base URL of the PHP server hosting the JSON APIs
21
+ API_BASE_URL=https://finsapdev.qhtestingserver.com
22
+ GENERAL_SECTIONS_PATH=/MODEL_APIS/handbook_general_sections.php
23
+ UNIVERSITY_SECTIONS_PATH=/MODEL_APIS/university_handbook.php
24
+
25
+ # ── Images directory ──
26
+ IMAGES_DIR=./images
27
+
28
+ # ── Fonts directory ──
29
+ FONT_DIR=./fonts
30
+
31
+ # ── CORS allowed origins (comma-separated) ──
32
+ CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173,https://finsapdev.qhtestingserver.com
33
+
34
+ # ── HTTP timeout for upstream API calls (seconds) ──
35
+ HTTP_TIMEOUT=25
.gitattributes CHANGED
@@ -1,35 +1,7 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
2
+ *.webp filter=lfs diff=lfs merge=lfs -text
3
+ *.pdf filter=lfs diff=lfs merge=lfs -text
4
+ *.ttf filter=lfs diff=lfs merge=lfs -text
5
+ *.TTF filter=lfs diff=lfs merge=lfs -text
6
+ *.png filter=lfs diff=lfs merge=lfs -text
7
+ *.jpg filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .env
5
+ .venv/
6
+ venv/
7
+ dist/
8
+ *.egg-info/
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+ app/handbook_pdf/
Dockerfile ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ # Playwright/Chromium system dependencies
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ # Chromium dependencies
6
+ libnss3 \
7
+ libnspr4 \
8
+ libatk1.0-0 \
9
+ libatk-bridge2.0-0 \
10
+ libcups2 \
11
+ libdrm2 \
12
+ libxkbcommon0 \
13
+ libxcomposite1 \
14
+ libxdamage1 \
15
+ libxrandr2 \
16
+ libgbm1 \
17
+ libpango-1.0-0 \
18
+ libcairo2 \
19
+ libasound2 \
20
+ libatspi2.0-0 \
21
+ libxshmfence1 \
22
+ # Font rendering
23
+ fonts-liberation \
24
+ fontconfig \
25
+ # General utilities
26
+ wget \
27
+ && rm -rf /var/lib/apt/lists/*
28
+
29
+ WORKDIR /app
30
+
31
+ # Install Python dependencies
32
+ COPY requirements.txt .
33
+ RUN pip install --no-cache-dir -r requirements.txt
34
+
35
+ # Set browser path BEFORE install so Playwright puts browsers here
36
+ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
37
+
38
+ # Install Playwright Chromium browser + all required system deps
39
+ RUN playwright install --with-deps chromium
40
+
41
+ # Copy application code
42
+ COPY app/ ./app/
43
+
44
+ # Copy static assets (fonts & images used for PDF rendering)
45
+ COPY fonts/ ./fonts/
46
+ COPY images/ ./images/
47
+
48
+ # Copy env example as fallback
49
+ COPY .env.example .env.example
50
+
51
+ # Cloud Run injects PORT; HF Spaces uses 7860
52
+ ENV PORT=7860
53
+ EXPOSE 7860
54
+
55
+ # Single worker β€” Playwright+Chromium is memory-heavy.
56
+ # timeout-keep-alive=300 keeps the connection open during long PDF renders.
57
+ CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860} --workers 1 --timeout-keep-alive 300"]
README.md CHANGED
@@ -1,10 +1,305 @@
1
  ---
2
- title: Handbook Engine
3
- emoji: 🐒
4
- colorFrom: green
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: ISP Handbook Engine
3
+ emoji: πŸ“˜
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
+ # ISP Handbook Service β€” Python Migration
11
+
12
+ A Python/FastAPI service that generates the ISP (International Scholars Program) Handbook as PDF or HTML. This is a drop-in replacement for the PHP handbook generation pipeline, designed to be called over HTTP from the existing PHP application.
13
+
14
+ ## Architecture
15
+
16
+ ```
17
+ python_service/
18
+ β”œβ”€β”€ app/
19
+ β”‚ β”œβ”€β”€ main.py # FastAPI entry point
20
+ β”‚ β”œβ”€β”€ api/
21
+ β”‚ β”‚ └── routes.py # REST endpoints
22
+ β”‚ β”œβ”€β”€ core/
23
+ β”‚ β”‚ β”œβ”€β”€ config.py # Environment-based settings
24
+ β”‚ β”‚ β”œβ”€β”€ database.py # SQLAlchemy engine (MySQL)
25
+ β”‚ β”‚ β”œβ”€β”€ fonts.py # Century Gothic font management
26
+ β”‚ β”‚ └── logging.py # Logging setup
27
+ β”‚ β”œβ”€β”€ models/ # SQLAlchemy models (if needed)
28
+ β”‚ β”œβ”€β”€ repositories/
29
+ β”‚ β”‚ └── handbook_repo.py # Direct DB access (fallback)
30
+ β”‚ β”œβ”€β”€ schemas/
31
+ β”‚ β”‚ └── handbook.py # Pydantic request/response models
32
+ β”‚ └── services/
33
+ β”‚ β”œβ”€β”€ data_fetcher.py # Fetch data from external JSON APIs
34
+ β”‚ β”œβ”€β”€ html_builder.py # Build full handbook HTML
35
+ β”‚ β”œβ”€β”€ pdf_service.py # HTML -> PDF via WeasyPrint
36
+ β”‚ β”œβ”€β”€ renderers.py # TOC, sections, university renderers
37
+ β”‚ └── utils.py # Shared helpers (h, money format, etc.)
38
+ β”œβ”€β”€ tests/
39
+ β”‚ β”œβ”€β”€ test_api.py
40
+ β”‚ └── test_renderers.py
41
+ β”œβ”€β”€ fonts/ # Century Gothic TTF files
42
+ β”œβ”€β”€ images/ # Handbook images (cover, header, etc.)
43
+ β”œβ”€β”€ css/ # Base stylesheet
44
+ β”œβ”€β”€ Dockerfile
45
+ β”œβ”€β”€ requirements.txt
46
+ β”œβ”€β”€ .env.example
47
+ └── README.md
48
+ ```
49
+
50
+ ## API Endpoints
51
+
52
+ | Method | Path | Description |
53
+ |--------|------|-------------|
54
+ | `GET` | `/health` | Health check |
55
+ | `GET` | `/diagnostics/fonts` | Font file diagnostics |
56
+ | `GET` | `/api/v1/sections/global?catalog_id=0` | Fetch normalised global sections |
57
+ | `GET` | `/api/v1/sections/universities` | Fetch normalised university sections |
58
+ | `GET` | `/api/v1/handbook/pdf?catalog_id=0` | Generate PDF (download) |
59
+ | `POST` | `/api/v1/handbook/pdf` | Generate PDF with JSON body |
60
+ | `GET` | `/api/v1/handbook/html?catalog_id=0` | Generate HTML preview |
61
+ | `POST` | `/api/v1/handbook/render` | Generate PDF or HTML based on `output_format` |
62
+ | `GET` | `/docs` | Swagger UI |
63
+ | `GET` | `/redoc` | ReDoc UI |
64
+
65
+ ## Local Development
66
+
67
+ ### Prerequisites
68
+
69
+ - Python 3.11+
70
+ - MySQL database (existing schema β€” unchanged)
71
+ - Century Gothic font files in `fonts/` directory
72
+
73
+ ### Setup
74
+
75
+ ```bash
76
+ cd python_service
77
+
78
+ # Create virtualenv
79
+ python -m venv .venv
80
+ .venv\Scripts\activate # Windows
81
+ # source .venv/bin/activate # Linux/Mac
82
+
83
+ # Install dependencies
84
+ pip install -r requirements.txt
85
+
86
+ # Copy and configure environment
87
+ copy .env.example .env
88
+ # Edit .env with your database credentials and API URLs
89
+ ```
90
+
91
+ ### Run
92
+
93
+ ```bash
94
+ uvicorn app.main:app --reload --host 0.0.0.0 --port 7860
95
+ ```
96
+
97
+ Visit http://localhost:7860/docs for the interactive API documentation.
98
+
99
+ ### Run Tests
100
+
101
+ ```bash
102
+ pytest tests/ -v
103
+ ```
104
+
105
+ ## Docker
106
+
107
+ ### Build
108
+
109
+ ```bash
110
+ docker build -t isp-handbook-service .
111
+ ```
112
+
113
+ ### Run
114
+
115
+ ```bash
116
+ docker run -d \
117
+ --name handbook-service \
118
+ -p 7860:7860 \
119
+ -e DB_HOST=host.docker.internal \
120
+ -e DB_USER=root \
121
+ -e DB_PASSWORD=secret \
122
+ -e DB_NAME=handbook \
123
+ -e API_BASE_URL=https://finsapdev.qhtestingserver.com \
124
+ isp-handbook-service
125
+ ```
126
+
127
+ Or with an env file:
128
+
129
+ ```bash
130
+ docker run -d --name handbook-service -p 7860:7860 --env-file .env isp-handbook-service
131
+ ```
132
+
133
+ ## Hugging Face Spaces Deployment
134
+
135
+ 1. Create a new Space on Hugging Face with **Docker** SDK
136
+ 2. Upload/push the `python_service/` directory as the Space root
137
+ 3. Ensure `fonts/`, `images/`, and `css/` directories are included
138
+ 4. Set environment variables (Secrets) in Space settings:
139
+ - `DB_HOST`, `DB_USER`, `DB_PASSWORD`, `DB_NAME`
140
+ - `API_BASE_URL`
141
+ - `PORT=7860` (default for HF Spaces)
142
+ 5. The `Dockerfile` is already configured for HF Spaces (port 7860, `0.0.0.0`)
143
+
144
+ **Important**: Hugging Face Spaces may not allow outbound MySQL connections. If direct DB access is needed, use the external API endpoint approach (the service fetches data from the PHP JSON APIs over HTTP, not from the database directly).
145
+
146
+ ## PHP Integration Example
147
+
148
+ The PHP application can call this Python service over HTTP using cURL:
149
+
150
+ ```php
151
+ <?php
152
+ /**
153
+ * PHP client for the ISP Handbook Python Service.
154
+ * Replace HANDBOOK_SERVICE_URL with your actual deployment URL.
155
+ */
156
+
157
+ define('HANDBOOK_SERVICE_URL', 'http://localhost:7860');
158
+
159
+ /**
160
+ * Check service health.
161
+ */
162
+ function handbook_health(): array {
163
+ $url = HANDBOOK_SERVICE_URL . '/health';
164
+ $ch = curl_init($url);
165
+ curl_setopt_array($ch, [
166
+ CURLOPT_RETURNTRANSFER => true,
167
+ CURLOPT_TIMEOUT => 5,
168
+ ]);
169
+ $body = curl_exec($ch);
170
+ $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
171
+ curl_close($ch);
172
+
173
+ if ($code !== 200) {
174
+ return ['ok' => false, 'error' => 'Service unreachable', 'http_code' => $code];
175
+ }
176
+ return json_decode($body, true) ?? ['ok' => false, 'error' => 'Invalid response'];
177
+ }
178
+
179
+ /**
180
+ * Generate and download the handbook PDF.
181
+ */
182
+ function handbook_download_pdf(int $catalogId = 0, bool $debug = false): void {
183
+ $params = http_build_query([
184
+ 'catalog_id' => $catalogId,
185
+ 'debug' => $debug ? 'true' : 'false',
186
+ ]);
187
+ $url = HANDBOOK_SERVICE_URL . '/api/v1/handbook/pdf?' . $params;
188
+
189
+ $ch = curl_init($url);
190
+ curl_setopt_array($ch, [
191
+ CURLOPT_RETURNTRANSFER => true,
192
+ CURLOPT_TIMEOUT => 120,
193
+ CURLOPT_FOLLOWLOCATION => true,
194
+ ]);
195
+ $body = curl_exec($ch);
196
+ $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
197
+ $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
198
+ curl_close($ch);
199
+
200
+ if ($code !== 200 || strpos($contentType, 'application/pdf') === false) {
201
+ http_response_code(502);
202
+ header('Content-Type: text/plain');
203
+ echo "PDF generation failed (HTTP $code)";
204
+ return;
205
+ }
206
+
207
+ header('Content-Type: application/pdf');
208
+ header('Content-Disposition: attachment; filename="ISP_Handbook.pdf"');
209
+ header('Content-Length: ' . strlen($body));
210
+ echo $body;
211
+ }
212
+
213
+ /**
214
+ * Fetch global sections via the Python service.
215
+ */
216
+ function handbook_get_sections(int $catalogId = 0): array {
217
+ $url = HANDBOOK_SERVICE_URL . '/api/v1/sections/global?catalog_id=' . $catalogId;
218
+ $ch = curl_init($url);
219
+ curl_setopt_array($ch, [
220
+ CURLOPT_RETURNTRANSFER => true,
221
+ CURLOPT_TIMEOUT => 25,
222
+ ]);
223
+ $body = curl_exec($ch);
224
+ curl_close($ch);
225
+ return json_decode($body, true) ?? [];
226
+ }
227
+
228
+ /**
229
+ * Generate handbook via POST with custom options.
230
+ */
231
+ function handbook_generate(array $options = []): string {
232
+ $url = HANDBOOK_SERVICE_URL . '/api/v1/handbook/render';
233
+ $payload = json_encode(array_merge([
234
+ 'catalog_id' => 0,
235
+ 'include_inactive_programs' => false,
236
+ 'debug' => false,
237
+ 'output_format' => 'pdf',
238
+ ], $options));
239
+
240
+ $ch = curl_init($url);
241
+ curl_setopt_array($ch, [
242
+ CURLOPT_RETURNTRANSFER => true,
243
+ CURLOPT_POST => true,
244
+ CURLOPT_POSTFIELDS => $payload,
245
+ CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
246
+ CURLOPT_TIMEOUT => 120,
247
+ ]);
248
+ $body = curl_exec($ch);
249
+ curl_close($ch);
250
+ return $body;
251
+ }
252
+ ```
253
+
254
+ ### Usage in PHP
255
+
256
+ ```php
257
+ // Health check
258
+ $status = handbook_health();
259
+ if ($status['status'] === 'ok') {
260
+ echo "Service is running\n";
261
+ }
262
+
263
+ // Stream PDF to browser
264
+ handbook_download_pdf(catalogId: 1);
265
+
266
+ // Get sections data
267
+ $sections = handbook_get_sections(catalogId: 1);
268
+ print_r($sections);
269
+ ```
270
+
271
+ ## Migration Notes & Assumptions
272
+
273
+ ### What was migrated
274
+
275
+ | PHP Component | Python Equivalent | Notes |
276
+ |---|---|---|
277
+ | `common.php` (URL builder, HTTP client) | `data_fetcher.py` | Uses `httpx` instead of cURL |
278
+ | `cors.php` | FastAPI CORS middleware | Same origins preserved |
279
+ | `helpers.php` (`h()`, `respondJson()`) | Built into FastAPI + `utils.py` | |
280
+ | `fetchers.php` (global/uni data fetch) | `data_fetcher.py` | Identical normalisation logic |
281
+ | `renderers.php` (TOC, blocks, university) | `renderers.py` | All block types preserved |
282
+ | `html_builder.php` (`buildHandbookHtml`) | `html_builder.py` | Same HTML structure |
283
+ | `pdf.php` (Dompdf render) | `pdf_service.py` | **WeasyPrint** replaces Dompdf |
284
+ | `images.php` (image config) | `pdf_service.py` `_get_images_config()` | |
285
+ | `font_diagnostics.php` | `GET /diagnostics/fonts` | |
286
+ | `db.php` (mysqli) | `database.py` (SQLAlchemy) | Available but not primary path |
287
+
288
+ ### Key differences
289
+
290
+ 1. **PDF engine**: WeasyPrint replaces Dompdf. Layout may differ slightly in edge cases (table widths, page breaks). Both support `@font-face` with base64 TTF and `@page` rules.
291
+
292
+ 2. **TOC page numbers**: The PHP code uses a 2-pass Dompdf render to inject exact TOC page numbers via named destinations. WeasyPrint doesn't expose named destinations the same way. TOC pages are assigned sequentially in the initial migration. Exact page numbers can be added via a post-processing PDF pass if needed.
293
+
294
+ 3. **No auth**: The PHP code has no authentication. The Python service also has none. Add API key middleware if this service is exposed publicly.
295
+
296
+ 4. **Data source**: The service fetches data from the same two PHP JSON APIs over HTTP (not directly from the database). The `repositories/handbook_repo.py` provides a DB fallback if you want to bypass the PHP APIs entirely.
297
+
298
+ 5. **SSL verification**: Disabled for internal API calls (`verify=False` in httpx), matching the PHP behavior (`CURLOPT_SSL_VERIFYPEER => false`).
299
+
300
+ ### Risks
301
+
302
+ - **Font rendering**: Century Gothic rendering may differ slightly between Dompdf (PHP) and WeasyPrint (Python). Test with actual fonts.
303
+ - **Page break behavior**: Dompdf and WeasyPrint handle CSS `page-break-*` properties slightly differently.
304
+ - **Image embedding**: Remote campus images are fetched at generation time. Network issues will result in placeholder cells (same as PHP behavior).
305
+ - **Memory**: Large handbooks with many university images may require significant memory. The Dockerfile doesn't set memory limits β€” Hugging Face Spaces has its own limits.
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/routes.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """API router β€” handbook endpoints.
2
+
3
+ Exposes REST endpoints that the PHP application calls over HTTP.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from typing import Any
10
+
11
+ from fastapi import APIRouter, HTTPException, Query
12
+ from fastapi.responses import HTMLResponse, Response
13
+
14
+ from app.schemas.handbook import (
15
+ ErrorResponse,
16
+ FontDiagnosticsResponse,
17
+ GlobalSectionsResponse,
18
+ HandbookRequest,
19
+ HealthResponse,
20
+ SectionItem,
21
+ UniversitySectionsResponse,
22
+ UniversityPayload,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ router = APIRouter()
28
+
29
+
30
+ # ── Root / HF health probe ──
31
+
32
+ @router.get("/", tags=["system"])
33
+ async def root():
34
+ """Root endpoint β€” HF Spaces probes this URL for health checks."""
35
+ return {"status": "ok"}
36
+
37
+
38
+ # ── Health check ──
39
+
40
+ @router.get("/health", response_model=HealthResponse, tags=["system"])
41
+ async def health_check():
42
+ """Health check endpoint."""
43
+ from app.core.config import get_settings
44
+ settings = get_settings()
45
+ return HealthResponse(
46
+ status="ok",
47
+ service=settings.app_name,
48
+ version=settings.app_version,
49
+ )
50
+
51
+
52
+ # ── Font diagnostics ──
53
+
54
+ @router.get("/diagnostics/fonts", tags=["system"])
55
+ async def font_diagnostics():
56
+ """Font diagnostics endpoint. Mirrors PHP font_diagnostics.php."""
57
+ from app.core.fonts import font_diagnostics as _diag
58
+ try:
59
+ result = _diag()
60
+ return result
61
+ except Exception as exc:
62
+ raise HTTPException(status_code=500, detail=str(exc))
63
+
64
+
65
+ # ── Global sections (proxy/fetch) ──
66
+
67
+ @router.get("/api/v1/sections/global", tags=["sections"])
68
+ async def get_global_sections(catalog_id: int = Query(0, description="Catalog ID filter")):
69
+ """Fetch global handbook sections from the upstream API.
70
+
71
+ Returns normalised section data identical to what the PHP code produces.
72
+ """
73
+ from app.services.data_fetcher import fetch_global_sections
74
+
75
+ try:
76
+ sections = await fetch_global_sections(catalog_id)
77
+ return {
78
+ "ok": True,
79
+ "general_sections": sections,
80
+ "count": len(sections),
81
+ }
82
+ except Exception as exc:
83
+ logger.exception("Failed to fetch global sections")
84
+ raise HTTPException(status_code=502, detail=str(exc))
85
+
86
+
87
+ # ── University sections (proxy/fetch) ──
88
+
89
+ @router.get("/api/v1/sections/universities", tags=["sections"])
90
+ async def get_university_sections():
91
+ """Fetch university handbook sections from the upstream API."""
92
+ from app.services.data_fetcher import fetch_university_sections
93
+
94
+ try:
95
+ by_uni = await fetch_university_sections()
96
+ return {
97
+ "ok": True,
98
+ "universities": by_uni,
99
+ "count": len(by_uni),
100
+ }
101
+ except Exception as exc:
102
+ logger.exception("Failed to fetch university sections")
103
+ raise HTTPException(status_code=502, detail=str(exc))
104
+
105
+
106
+ # ── Generate handbook (HTML or PDF) ──
107
+
108
+ @router.get("/api/v1/handbook/pdf", tags=["handbook"])
109
+ async def generate_handbook_pdf_get(
110
+ catalog_id: int = Query(0),
111
+ include_inactive_programs: bool = Query(False),
112
+ debug: bool = Query(False),
113
+ ):
114
+ """Generate the ISP Handbook as a PDF download (GET for easy PHP integration)."""
115
+ from app.services.pdf_service import generate_handbook_pdf
116
+
117
+ try:
118
+ pdf_bytes = await generate_handbook_pdf(
119
+ catalog_id=catalog_id,
120
+ include_inactive_programs=include_inactive_programs,
121
+ debug=debug,
122
+ )
123
+ return Response(
124
+ content=pdf_bytes,
125
+ media_type="application/pdf",
126
+ headers={
127
+ "Content-Disposition": 'attachment; filename="ISP_Handbook.pdf"',
128
+ "Cache-Control": "private, max-age=0, must-revalidate",
129
+ },
130
+ )
131
+ except Exception as exc:
132
+ logger.exception("PDF generation failed")
133
+ raise HTTPException(status_code=500, detail=str(exc))
134
+
135
+
136
+ @router.post("/api/v1/handbook/pdf", tags=["handbook"])
137
+ async def generate_handbook_pdf_post(request: HandbookRequest):
138
+ """Generate the ISP Handbook as a PDF download (POST with body)."""
139
+ from app.services.pdf_service import generate_handbook_pdf
140
+
141
+ try:
142
+ pdf_bytes = await generate_handbook_pdf(
143
+ catalog_id=request.catalog_id,
144
+ include_inactive_programs=request.include_inactive_programs,
145
+ debug=request.debug,
146
+ )
147
+ return Response(
148
+ content=pdf_bytes,
149
+ media_type="application/pdf",
150
+ headers={
151
+ "Content-Disposition": 'attachment; filename="ISP_Handbook.pdf"',
152
+ "Cache-Control": "private, max-age=0, must-revalidate",
153
+ },
154
+ )
155
+ except Exception as exc:
156
+ logger.exception("PDF generation failed")
157
+ raise HTTPException(status_code=500, detail=str(exc))
158
+
159
+
160
+ @router.get("/api/v1/handbook/html", tags=["handbook"])
161
+ async def generate_handbook_html_get(
162
+ catalog_id: int = Query(0),
163
+ include_inactive_programs: bool = Query(False),
164
+ debug: bool = Query(False),
165
+ ):
166
+ """Generate the ISP Handbook as raw HTML (useful for preview/debugging)."""
167
+ from app.services.pdf_service import generate_handbook_html
168
+
169
+ try:
170
+ html = await generate_handbook_html(
171
+ catalog_id=catalog_id,
172
+ include_inactive_programs=include_inactive_programs,
173
+ debug=debug,
174
+ )
175
+ return HTMLResponse(content=html)
176
+ except Exception as exc:
177
+ logger.exception("HTML generation failed")
178
+ raise HTTPException(status_code=500, detail=str(exc))
179
+
180
+
181
+ @router.post("/api/v1/handbook/render", tags=["handbook"])
182
+ async def render_handbook(request: HandbookRequest):
183
+ """Generate handbook in the requested format (pdf or html)."""
184
+ if request.output_format == "html":
185
+ from app.services.pdf_service import generate_handbook_html
186
+ try:
187
+ html = await generate_handbook_html(
188
+ catalog_id=request.catalog_id,
189
+ include_inactive_programs=request.include_inactive_programs,
190
+ debug=request.debug,
191
+ )
192
+ return HTMLResponse(content=html)
193
+ except Exception as exc:
194
+ logger.exception("HTML generation failed")
195
+ raise HTTPException(status_code=500, detail=str(exc))
196
+ else:
197
+ from app.services.pdf_service import generate_handbook_pdf
198
+ try:
199
+ pdf_bytes = await generate_handbook_pdf(
200
+ catalog_id=request.catalog_id,
201
+ include_inactive_programs=request.include_inactive_programs,
202
+ debug=request.debug,
203
+ )
204
+ return Response(
205
+ content=pdf_bytes,
206
+ media_type="application/pdf",
207
+ headers={
208
+ "Content-Disposition": 'attachment; filename="ISP_Handbook.pdf"',
209
+ "Cache-Control": "private, max-age=0, must-revalidate",
210
+ },
211
+ )
212
+ except Exception as exc:
213
+ logger.exception("PDF generation failed")
214
+ raise HTTPException(status_code=500, detail=str(exc))
app/core/__init__.py ADDED
File without changes
app/core/config.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Application configuration via environment variables."""
2
+
3
+ import os
4
+ from functools import lru_cache
5
+ from pydantic_settings import BaseSettings
6
+
7
+
8
+ class Settings(BaseSettings):
9
+ """All config comes from environment variables or .env file."""
10
+
11
+ # App
12
+ app_name: str = "ISP Handbook Service"
13
+ app_version: str = "1.0.0"
14
+ debug: bool = False
15
+ port: int = 7860 # Hugging Face Spaces default
16
+
17
+ # External API endpoints (the source-of-truth JSON APIs)
18
+ handbook_general_endpoint: str = ""
19
+ university_handbook_endpoint: str = ""
20
+ api_base_url: str = "https://finsapdev.qhtestingserver.com"
21
+ general_sections_path: str = "/MODEL_APIS/handbook_general_sections.php"
22
+ university_sections_path: str = "/MODEL_APIS/university_handbook.php"
23
+
24
+ # Images
25
+ images_dir: str = "./images"
26
+
27
+ # Fonts
28
+ font_dir: str = "./fonts"
29
+
30
+ # CORS
31
+ cors_origins: str = "http://localhost:5173,http://127.0.0.1:5173,https://finsapdev.qhtestingserver.com,https://internationalscholarsdev.qhtestingserver.com"
32
+
33
+ # Request timeouts
34
+ http_timeout: int = 25
35
+
36
+ model_config = {"env_file": ".env", "env_file_encoding": "utf-8", "extra": "ignore"}
37
+
38
+ @property
39
+ def cors_origins_list(self) -> list[str]:
40
+ return [o.strip() for o in self.cors_origins.split(",") if o.strip()]
41
+
42
+ @property
43
+ def general_endpoint_url(self) -> str:
44
+ if self.handbook_general_endpoint:
45
+ return self.handbook_general_endpoint
46
+ return self.api_base_url.rstrip("/") + self.general_sections_path
47
+
48
+ @property
49
+ def university_endpoint_url(self) -> str:
50
+ if self.university_handbook_endpoint:
51
+ return self.university_handbook_endpoint
52
+ return self.api_base_url.rstrip("/") + self.university_sections_path
53
+
54
+
55
+ @lru_cache()
56
+ def get_settings() -> Settings:
57
+ return Settings()
app/core/fonts.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Font file management β€” Century Gothic only.
2
+
3
+ Mirrors the PHP handbook_select_font_family / handbook_font_face_css logic.
4
+ """
5
+
6
+ import base64
7
+ import os
8
+ from pathlib import Path
9
+
10
+ from app.core.config import get_settings
11
+
12
+
13
+ class FontError(RuntimeError):
14
+ pass
15
+
16
+
17
+ VARIANTS = ("regular", "bold", "italic", "bold_italic")
18
+ FILE_MAP = {
19
+ "regular": "GOTHIC.TTF",
20
+ "bold": "GOTHICB.TTF",
21
+ "italic": "GOTHICI.TTF",
22
+ "bold_italic": "GOTHICBI.TTF",
23
+ }
24
+
25
+
26
+ def _font_dir() -> Path:
27
+ return Path(get_settings().font_dir)
28
+
29
+
30
+ def select_font_family() -> dict:
31
+ """Return font metadata dict. Raises FontError if any file is missing."""
32
+ font_dir = _font_dir()
33
+ paths: dict[str, Path] = {}
34
+ for variant, filename in FILE_MAP.items():
35
+ p = font_dir / filename
36
+ if not p.is_file():
37
+ raise FontError(
38
+ f'Century Gothic font file missing for variant "{variant}": {p}'
39
+ )
40
+ paths[variant] = p
41
+
42
+ return {
43
+ "family": "Century Gothic",
44
+ "regular": str(paths["regular"]),
45
+ "bold": str(paths["bold"]),
46
+ "italic": str(paths["italic"]),
47
+ "bold_italic": str(paths["bold_italic"]),
48
+ "status": "primary",
49
+ }
50
+
51
+
52
+ def font_face_css(font_meta: dict | None = None) -> str:
53
+ """Generate @font-face CSS with base64-embedded TTF data."""
54
+ meta = font_meta or select_font_family()
55
+ family = meta.get("family", "Century Gothic")
56
+
57
+ encoded: dict[str, str] = {}
58
+ for variant in VARIANTS:
59
+ path = meta.get(variant)
60
+ if not path or not os.path.isfile(path):
61
+ raise FontError(
62
+ f'Century Gothic font file missing for variant "{variant}": {path}'
63
+ )
64
+ with open(path, "rb") as f:
65
+ data = base64.b64encode(f.read()).decode("ascii")
66
+ if not data:
67
+ raise FontError(f"Failed to read/encode font file: {path}")
68
+ encoded[variant] = data
69
+
70
+ css_parts = []
71
+ weight_style = {
72
+ "regular": ("400", "normal"),
73
+ "bold": ("700", "normal"),
74
+ "italic": ("400", "italic"),
75
+ "bold_italic": ("700", "italic"),
76
+ }
77
+ for variant, (weight, style) in weight_style.items():
78
+ css_parts.append(
79
+ f"@font-face {{\n"
80
+ f" font-family: '{family}';\n"
81
+ f" src: url('data:font/ttf;base64,{encoded[variant]}') format('truetype');\n"
82
+ f" font-weight: {weight};\n"
83
+ f" font-style: {style};\n"
84
+ f"}}"
85
+ )
86
+
87
+ return "\n".join(css_parts)
88
+
89
+
90
+ def font_diagnostics() -> dict:
91
+ """Return diagnostic info about font availability."""
92
+ font_dir = _font_dir()
93
+ result = {
94
+ "font_dir": str(font_dir),
95
+ "font_dir_exists": font_dir.is_dir(),
96
+ "variants": {},
97
+ }
98
+ for variant, filename in FILE_MAP.items():
99
+ p = font_dir / filename
100
+ result["variants"][variant] = {
101
+ "path": str(p),
102
+ "exists": p.is_file(),
103
+ "size_bytes": p.stat().st_size if p.is_file() else 0,
104
+ }
105
+ return result
app/core/logging.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """Centralised logging setup."""
2
+
3
+ import logging
4
+ import sys
5
+
6
+
7
+ def setup_logging(debug: bool = False) -> None:
8
+ level = logging.DEBUG if debug else logging.INFO
9
+ fmt = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
10
+ logging.basicConfig(stream=sys.stdout, level=level, format=fmt, force=True)
app/core/theme.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Centralized handbook visual theme β€” single source of truth.
2
+
3
+ All colour values, font sizes, spacing, and rendering tokens live here.
4
+ Templates, CSS generation, and renderers reference this module instead
5
+ of hardcoding visual rules.
6
+
7
+ Spec source: ISP Handbook Enhancement Guidelines + sample PDF.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass, field
13
+
14
+
15
+ # ── Colour palette ──────────────────────────────────────────────
16
+
17
+ @dataclass(frozen=True)
18
+ class Colors:
19
+ """Every colour used in the handbook, named by purpose."""
20
+
21
+ heading_blue: str = "#0263A3"
22
+ heading_green: str = "#199970"
23
+ body_text: str = "#000000"
24
+ toc_text: str = "#111111"
25
+ note_red: str = "#C00000"
26
+ link_blue: str = "#0263A3"
27
+ benefits_header_bg: str = "#00F600"
28
+ benefits_header_fg: str = "#FFFFFF"
29
+ benefit_item_bg: str = "#00FCFC"
30
+ benefit_item_fg: str = "#000000"
31
+ school_info_green: str = "#199970"
32
+ table_border: str = "#333333"
33
+ table_header_bg: str = "#E6E6E6"
34
+ table_header_fg: str = "#333333"
35
+ toc_dots: str = "#777777"
36
+ muted: str = "#666666"
37
+ note_bg: str = "#F7F8FA"
38
+ note_border: str = "#BBBBBB"
39
+ page_bg: str = "#FFFFFF"
40
+
41
+
42
+ # ── Typography ──────────────────────────────────────────────────
43
+
44
+ @dataclass(frozen=True)
45
+ class Typography:
46
+ """Font families, sizes, weights, and line-heights."""
47
+
48
+ font_family: str = "'Century Gothic', 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif"
49
+ font_size_body: str = "10pt"
50
+ font_size_h1: str = "12pt"
51
+ font_size_h2: str = "12pt"
52
+ font_size_h3: str = "10pt"
53
+ font_size_toc_heading: str = "12pt"
54
+ font_size_toc_item: str = "10pt"
55
+ font_size_table: str = "9.5px"
56
+ font_size_programs_table: str = "8.5px"
57
+ font_size_career_list: str = "8.5px"
58
+ font_size_note: str = "9.5px"
59
+ font_size_benefits_header: str = "10.5px"
60
+ font_size_benefit_item: str = "10px"
61
+ font_size_school_name: str = "12pt"
62
+ font_size_summary_label: str = "10.5px"
63
+ font_size_summary_value: str = "9.5px"
64
+ font_size_qualify: str = "10px"
65
+ line_height_body: str = "1.4"
66
+ line_height_heading: str = "1.2"
67
+ line_height_table: str = "1.25"
68
+
69
+
70
+ # ── Spacing / margins ──────────────────────────────────────────
71
+
72
+ @dataclass(frozen=True)
73
+ class Spacing:
74
+ """Page geometry and element margins. All margins: 2.54cm."""
75
+
76
+ page_margin_top: str = "2.54cm"
77
+ page_margin_right: str = "2.54cm"
78
+ page_margin_bottom: str = "2.54cm"
79
+ page_margin_left: str = "2.54cm"
80
+ paragraph_margin: str = "2px 0 8px"
81
+ heading_margin_h1: str = "12px 0 6px"
82
+ heading_margin_h2: str = "10px 0 4px"
83
+ list_margin: str = "2px 0 8px 18px"
84
+ note_padding: str = "6px 8px"
85
+ note_margin: str = "6px 0 8px"
86
+ table_margin: str = "6px 0 10px"
87
+ table_cell_padding: str = "5px 6px"
88
+ benefits_margin: str = "4px 0 4px"
89
+ school_top_summary_width: str = "58%"
90
+ school_top_campus_width: str = "42%"
91
+
92
+
93
+ # ── Table column widths ────────────────────────────────────────
94
+
95
+ @dataclass(frozen=True)
96
+ class ProgramTableColumns:
97
+ """Fixed widths for the 5-column programs table."""
98
+
99
+ program: str = "30%"
100
+ designation: str = "20%"
101
+ entrance_exam: str = "20%"
102
+ funding: str = "30%"
103
+
104
+
105
+ # ── Bullet characters ──────────────────────────────────────────
106
+
107
+ @dataclass(frozen=True)
108
+ class Bullets:
109
+ """Bullet characters used throughout the handbook."""
110
+
111
+ primary: str = "\u27A2" # ➒
112
+ benefit: str = "\u2022" # β€’
113
+ career: str = "disc" # CSS list-style-type for career lists
114
+
115
+
116
+ # ── Render-block type registry ──────────────────────────────────
117
+
118
+ BLOCK_TYPES = (
119
+ "heading_1",
120
+ "heading_2",
121
+ "paragraph",
122
+ "bullet_list",
123
+ "note",
124
+ "table",
125
+ "enrollment_steps",
126
+ "school_profile",
127
+ "university_summary",
128
+ "toc",
129
+ "cover",
130
+ "full_page_image",
131
+ )
132
+
133
+
134
+ # ── Composed theme object ──────────────────────────────────────
135
+
136
+ @dataclass(frozen=True)
137
+ class HandbookTheme:
138
+ """Complete handbook theme β€” inject into renderers and templates."""
139
+
140
+ colors: Colors = field(default_factory=Colors)
141
+ typography: Typography = field(default_factory=Typography)
142
+ spacing: Spacing = field(default_factory=Spacing)
143
+ program_columns: ProgramTableColumns = field(default_factory=ProgramTableColumns)
144
+ bullets: Bullets = field(default_factory=Bullets)
145
+
146
+ def css_vars(self) -> dict[str, str]:
147
+ """Flatten theme to CSS custom properties (--hb-*)."""
148
+ v: dict[str, str] = {}
149
+ # Colors
150
+ for fname in Colors.__dataclass_fields__:
151
+ v[f"--hb-{fname.replace('_', '-')}"] = getattr(self.colors, fname)
152
+ # Typography
153
+ v["--hb-font-family"] = self.typography.font_family
154
+ v["--hb-font-size-body"] = self.typography.font_size_body
155
+ v["--hb-font-size-h1"] = self.typography.font_size_h1
156
+ v["--hb-font-size-h2"] = self.typography.font_size_h2
157
+ v["--hb-line-height-body"] = self.typography.line_height_body
158
+ # Spacing
159
+ v["--hb-page-margin-top"] = self.spacing.page_margin_top
160
+ v["--hb-page-margin-right"] = self.spacing.page_margin_right
161
+ v["--hb-page-margin-bottom"] = self.spacing.page_margin_bottom
162
+ v["--hb-page-margin-left"] = self.spacing.page_margin_left
163
+ # Bullet
164
+ v["--hb-bullet-char"] = f'"{self.bullets.primary}"'
165
+ return v
166
+
167
+
168
+ # Module-level singleton
169
+ THEME = HandbookTheme()
app/main.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI application entry point."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from pathlib import Path
7
+
8
+ from fastapi import FastAPI
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.staticfiles import StaticFiles
11
+
12
+ from app.api.routes import router
13
+ from app.core.config import get_settings
14
+ from app.core.logging import setup_logging
15
+
16
+ settings = get_settings()
17
+ setup_logging(settings.debug)
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ app = FastAPI(
22
+ title=settings.app_name,
23
+ version=settings.app_version,
24
+ docs_url="/docs",
25
+ redoc_url="/redoc",
26
+ openapi_url="/openapi.json",
27
+ )
28
+
29
+ # CORS β€” mirrors PHP cors.php allowed origins
30
+ app.add_middleware(
31
+ CORSMiddleware,
32
+ allow_origins=settings.cors_origins_list,
33
+ allow_credentials=False,
34
+ allow_methods=["GET", "POST", "OPTIONS"],
35
+ allow_headers=["*"],
36
+ expose_headers=["Content-Disposition", "Content-Length", "Content-Type"],
37
+ )
38
+
39
+ # Serve static assets (CSS, images) for Playwright to load via file://
40
+ # Also accessible at /static/ for debugging
41
+ _static_dir = Path(__file__).resolve().parent / "static"
42
+ if _static_dir.is_dir():
43
+ app.mount("/static", StaticFiles(directory=str(_static_dir)), name="static")
44
+
45
+ app.include_router(router)
46
+
47
+
48
+ @app.on_event("startup")
49
+ async def startup_event():
50
+ logger.info(
51
+ "%s v%s starting on port %d (debug=%s, renderer=playwright)",
52
+ settings.app_name,
53
+ settings.app_version,
54
+ settings.port,
55
+ settings.debug,
56
+ )
57
+
58
+
59
+ @app.on_event("shutdown")
60
+ async def shutdown_event():
61
+ """Gracefully close the Playwright browser on shutdown."""
62
+ from app.services.pdf_renderer import shutdown_browser
63
+ await shutdown_browser()
64
+ logger.info("Application shutdown complete")
app/models/__init__.py ADDED
File without changes
app/repositories/__init__.py ADDED
File without changes
app/schemas/__init__.py ADDED
File without changes
app/schemas/handbook.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic schemas for request/response validation.
2
+
3
+ These mirror the data shapes used by the PHP code β€” section_json structures,
4
+ university payloads, and API responses.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+
14
+ # ── Section-level schemas ──
15
+
16
+
17
+ class SectionItem(BaseModel):
18
+ """A single handbook section (global or university-level)."""
19
+
20
+ section_key: str = ""
21
+ section_title: str = ""
22
+ section_json: dict[str, Any] | list[Any] = Field(default_factory=dict)
23
+ sort_order: int | None = None
24
+ id: int | None = None
25
+
26
+
27
+ class UniversityPayload(BaseModel):
28
+ """A university with its sections."""
29
+
30
+ university_id: int = 0
31
+ university_name: str = ""
32
+ is_active: bool = True
33
+ website: str = ""
34
+ sections: list[SectionItem] = Field(default_factory=list)
35
+ sort_order: int | None = None
36
+
37
+
38
+ # ── API response wrappers ──
39
+
40
+
41
+ class GlobalSectionsResponse(BaseModel):
42
+ ok: bool = True
43
+ general_sections: list[SectionItem] = Field(default_factory=list)
44
+
45
+
46
+ class UniversitySectionsResponse(BaseModel):
47
+ ok: bool = True
48
+ universities: list[UniversityPayload] = Field(default_factory=list)
49
+
50
+
51
+ # ── Handbook generation request ──
52
+
53
+
54
+ class HandbookRequest(BaseModel):
55
+ """Request body for handbook generation."""
56
+
57
+ catalog_id: int = 0
58
+ include_inactive_programs: bool = False
59
+ debug: bool = False
60
+ output_format: str = Field(
61
+ default="pdf", description="'pdf' or 'html'"
62
+ )
63
+
64
+
65
+ # ── Health check ──
66
+
67
+
68
+ class HealthResponse(BaseModel):
69
+ status: str = "ok"
70
+ service: str = "handbook-service"
71
+ version: str = "1.0.0"
72
+
73
+
74
+ # ── Font diagnostics ──
75
+
76
+
77
+ class FontDiagnosticsResponse(BaseModel):
78
+ library: str = "playwright"
79
+ font_dir: str = ""
80
+ font_dir_exists: bool = False
81
+ variants: dict[str, dict[str, Any]] = Field(default_factory=dict)
82
+
83
+
84
+ # ── Generic error ──
85
+
86
+
87
+ class ErrorResponse(BaseModel):
88
+ ok: bool = False
89
+ error: str = ""
90
+ detail: str = ""
app/services/__init__.py ADDED
File without changes
app/services/data_fetcher.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data fetcher service β€” mirrors PHP fetchers.php.
2
+
3
+ Fetches handbook data from the two external JSON APIs (source of truth),
4
+ normalises the payloads, and returns typed dicts identical to what the
5
+ PHP code produced.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ from typing import Any
13
+
14
+ import httpx
15
+
16
+ from app.core.config import get_settings
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def _normalize_section_json(raw: Any, context: str, sid: Any = None) -> dict | list:
22
+ """Mirrors PHP handbook_normalize_section_json."""
23
+ if isinstance(raw, dict) or isinstance(raw, list):
24
+ return raw
25
+ if isinstance(raw, str):
26
+ raw = raw.strip()
27
+ if not raw:
28
+ return {}
29
+ try:
30
+ decoded = json.loads(raw)
31
+ if isinstance(decoded, (dict, list)):
32
+ return decoded
33
+ except (json.JSONDecodeError, ValueError):
34
+ logger.warning(
35
+ "section_json parse failed ctx=%s id=%s snippet=%.180s",
36
+ context, sid, raw,
37
+ )
38
+ return {}
39
+ return {}
40
+
41
+
42
+ def _is_truthy(val: Any) -> bool:
43
+ """Mirrors PHP handbook_true."""
44
+ if isinstance(val, bool):
45
+ return val
46
+ if isinstance(val, int):
47
+ return val != 0
48
+ s = str(val).lower().strip()
49
+ return s not in ("0", "false", "")
50
+
51
+
52
+ def _tier_section_rank(section_key: str) -> int:
53
+ """Return sort priority for tier-related section keys.
54
+
55
+ Tier One sections sort before Tier Two; non-tier sections get 99 (neutral).
56
+ """
57
+ k = section_key.lower().replace("-", "_").replace(" ", "_")
58
+ if "tier_one" in k or "non_cosigner" in k:
59
+ return 0
60
+ if "tier_two" in k or k in ("cosigner_schools", "cosigner"):
61
+ return 1
62
+ return 99
63
+
64
+
65
+ def _sort_sections_stable(sections: list[dict]) -> list[dict]:
66
+ """Mirrors PHP sortHandbookSectionsStable with tier-aware tiebreaker."""
67
+ for i, s in enumerate(sections):
68
+ s.setdefault("_i", i)
69
+
70
+ def sort_key(s: dict):
71
+ so = s.get("sort_order")
72
+ sid = s.get("id")
73
+ # None values sort after numeric values
74
+ so_key = (0, so) if so is not None else (1, 0)
75
+ # Tier-aware tiebreaker: Tier One before Tier Two when sort_order ties
76
+ tier_rank = _tier_section_rank(str(s.get("section_key", "")))
77
+ sid_key = (0, sid) if sid is not None else (1, 0)
78
+ return (so_key, tier_rank, sid_key, s.get("_i", 0))
79
+
80
+ sections.sort(key=sort_key)
81
+ for s in sections:
82
+ s.pop("_i", None)
83
+ return sections
84
+
85
+
86
+ async def fetch_global_sections(catalog_id: int = 0) -> list[dict[str, Any]]:
87
+ """Fetch and normalise global handbook sections from the external API.
88
+
89
+ Mirrors PHP fetchGlobalSections().
90
+ """
91
+ settings = get_settings()
92
+ url = settings.general_endpoint_url
93
+ if catalog_id:
94
+ sep = "&" if "?" in url else "?"
95
+ url += f"{sep}catalog_id={catalog_id}"
96
+
97
+ try:
98
+ async with httpx.AsyncClient(verify=False, timeout=settings.http_timeout) as client:
99
+ resp = await client.get(url)
100
+ resp.raise_for_status()
101
+ payload = resp.json()
102
+ except Exception as exc:
103
+ logger.error("Global sections fetch failed: %s url=%s", exc, url)
104
+ return []
105
+
106
+ if not payload.get("ok"):
107
+ logger.warning("Global sections API returned ok=false: %s", payload)
108
+ return []
109
+
110
+ # Accept common shapes
111
+ sections_raw = (
112
+ payload.get("general_sections")
113
+ or payload.get("sections")
114
+ or payload.get("globals")
115
+ or payload.get("data")
116
+ or []
117
+ )
118
+ if not isinstance(sections_raw, list):
119
+ sections_raw = []
120
+
121
+ out: list[dict[str, Any]] = []
122
+ for i, s in enumerate(sections_raw):
123
+ if not isinstance(s, dict):
124
+ continue
125
+
126
+ k = str(s.get("section_key", ""))
127
+ t = str(s.get("section_title", ""))
128
+ j = _normalize_section_json(s.get("section_json", {}), "global", s.get("id"))
129
+
130
+ sort_raw = s.get("sort_order") or s.get("sortOrder")
131
+ sort_val = int(sort_raw) if sort_raw is not None and str(sort_raw).lstrip("-").isdigit() else None
132
+
133
+ if not k and not t and (not j or j == {}):
134
+ continue
135
+
136
+ out.append({
137
+ "section_key": k,
138
+ "section_title": t,
139
+ "section_json": j,
140
+ "sort_order": sort_val,
141
+ "id": int(s["id"]) if s.get("id") is not None else None,
142
+ "_i": i,
143
+ })
144
+
145
+ out = _sort_sections_stable(out)
146
+
147
+ logger.info(
148
+ "Global sections fetched catalog_id=%d count=%d keys=%s",
149
+ catalog_id,
150
+ len(out),
151
+ [s.get("section_key") for s in out],
152
+ )
153
+ return out
154
+
155
+
156
+ async def fetch_university_sections() -> dict[int, dict[str, Any]]:
157
+ """Fetch and normalise university handbook sections.
158
+
159
+ Returns dict keyed by university_id.
160
+ Mirrors PHP fetchUniversitySections().
161
+ """
162
+ settings = get_settings()
163
+ url = settings.university_endpoint_url
164
+
165
+ try:
166
+ async with httpx.AsyncClient(verify=False, timeout=settings.http_timeout) as client:
167
+ resp = await client.get(url)
168
+ resp.raise_for_status()
169
+ payload = resp.json()
170
+ except Exception as exc:
171
+ logger.error("University sections fetch failed: %s url=%s", exc, url)
172
+ return {}
173
+
174
+ if not payload.get("ok"):
175
+ logger.warning("University sections API returned ok=false")
176
+ return {}
177
+
178
+ universities = payload.get("universities", [])
179
+ if not isinstance(universities, list):
180
+ universities = []
181
+
182
+ by_uni: dict[int, dict[str, Any]] = {}
183
+ for u in universities:
184
+ if not isinstance(u, dict):
185
+ continue
186
+ uid = int(u.get("university_id", 0))
187
+ if uid <= 0:
188
+ continue
189
+
190
+ name = str(u.get("university_name", f"University #{uid}"))
191
+ is_active_raw = u.get("is_active", u.get("isActive", 1))
192
+ website = str(u.get("website", u.get("website_url", "")))
193
+ is_active = _is_truthy(is_active_raw)
194
+
195
+ sections_raw = u.get("sections", [])
196
+ if not isinstance(sections_raw, list):
197
+ sections_raw = []
198
+
199
+ norm_sections: list[dict[str, Any]] = []
200
+ for s in sections_raw:
201
+ if not isinstance(s, dict):
202
+ continue
203
+ k = str(s.get("section_key", ""))
204
+ t = str(s.get("section_title", ""))
205
+ j = _normalize_section_json(s.get("section_json", {}), "university", s.get("id"))
206
+ if not k and not t and (not j or j == {}):
207
+ continue
208
+ norm_sections.append({
209
+ "section_key": k,
210
+ "section_title": t,
211
+ "section_json": j,
212
+ })
213
+
214
+ # Derive tier from school_category (backward-compatible β€” older APIs may omit these)
215
+ school_category = str(u.get("school_category", "")).strip()
216
+ tier = u.get("tier")
217
+ tier_label = u.get("tier_label", "")
218
+ if tier is None and school_category:
219
+ # Derive from school_category if tier not explicitly provided
220
+ if school_category == "non_cosigner":
221
+ tier, tier_label = 1, "Tier One"
222
+ elif school_category == "cosigner":
223
+ tier, tier_label = 2, "Tier Two"
224
+
225
+ by_uni[uid] = {
226
+ "university_name": name,
227
+ "sections": norm_sections,
228
+ "is_active": is_active,
229
+ "website": website,
230
+ "school_category": school_category,
231
+ "tier": tier,
232
+ "tier_label": tier_label or "",
233
+ }
234
+
235
+ # Sort: Tier One (non_cosigner) first, then Tier Two (cosigner), then by name
236
+ def _uni_sort_key(item: tuple[int, dict]) -> tuple:
237
+ uid, data = item
238
+ t = data.get("tier")
239
+ tier_rank = t if isinstance(t, int) else 99
240
+ return (tier_rank, data.get("university_name", "").lower(), uid)
241
+
242
+ return dict(sorted(by_uni.items(), key=_uni_sort_key))
app/services/html_builder.py ADDED
@@ -0,0 +1,650 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HTML builder β€” assembles the full ISP Handbook HTML document.
2
+
3
+ Uses Jinja2 templates for HTML generation. Data preparation logic is
4
+ preserved from the original string-concatenation approach. The output
5
+ is a self-contained HTML suitable for Playwright Chromium PDF export.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import base64
11
+ import logging
12
+ import mimetypes
13
+ import os
14
+ import re
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
19
+ from markupsafe import Markup
20
+
21
+ from app.core.config import get_settings
22
+ from app.core.fonts import font_face_css, select_font_family
23
+ from app.services.normalizer import normalize_section, normalize_university
24
+ from app.services.renderers import (
25
+ fetch_image_data_uri,
26
+ render_global_blocks,
27
+ sort_toc,
28
+ _extract_university_funding,
29
+ )
30
+ from app.services.utils import (
31
+ format_money_figures,
32
+ get_any,
33
+ h,
34
+ handbook_anchor,
35
+ hb_slug,
36
+ is_truthy,
37
+ sort_sections_stable,
38
+ )
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # Jinja2 environment β€” templates live alongside the app package
43
+ _TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "templates"
44
+
45
+
46
+ def _get_jinja_env() -> Environment:
47
+ """Create a Jinja2 environment pointing to our templates directory."""
48
+ env = Environment(
49
+ loader=FileSystemLoader(str(_TEMPLATES_DIR)),
50
+ autoescape=select_autoescape(["html"]),
51
+ trim_blocks=True,
52
+ lstrip_blocks=True,
53
+ )
54
+ return env
55
+
56
+
57
+ def _static_base_url() -> str:
58
+ """Return absolute file:// URL to the static directory."""
59
+ static_dir = Path(__file__).resolve().parent.parent / "static"
60
+ return static_dir.as_uri()
61
+
62
+
63
+ def _unused_pdf_override_css(font_stack: str) -> str:
64
+ """Legacy inline PDF override CSS β€” kept for reference only.
65
+ All styling now lives in static/css/print.css for Chromium rendering.
66
+ """
67
+ return ""
68
+
69
+
70
+ # Section class map
71
+ SECTION_CLASS_MAP = {
72
+ "overview": "sec-overview",
73
+ "how_the_program_works": "sec-how",
74
+ "qualification_requirements": "sec-qualification",
75
+ "enrolment_steps": "sec-steps",
76
+ "withdrawal_refund_policy": "sec-policy",
77
+ "refund_guidelines": "sec-refund",
78
+ "program_contributions": "sec-contributions",
79
+ "program_features_breakdown": "sec-breakdown",
80
+ "funding_options_available": "sec-funding",
81
+ "summary_of_universities": "sec-summary",
82
+ "summary_of_universities_cosigner": "sec-summary-cosigner",
83
+ }
84
+
85
+ PAGE_BREAK_KEYS = {
86
+ "overview",
87
+ "how_the_program_works",
88
+ "qualification_requirements",
89
+ "enrolment_steps",
90
+ "withdrawal_refund_policy",
91
+ "refund_guidelines",
92
+ "program_contributions",
93
+ "program_features_breakdown",
94
+ "funding_options_available",
95
+ "summary_of_universities",
96
+ "summary_of_universities_cosigner",
97
+ }
98
+
99
+
100
+ def _collect_program_option_inconsistencies(value: Any, path: str, hits: list[str]) -> None:
101
+ """Collect paths where only REGULAR or PRIME appears."""
102
+ if isinstance(value, dict):
103
+ for k, v in value.items():
104
+ _collect_program_option_inconsistencies(v, f"{path}.{k}" if path else str(k), hits)
105
+ return
106
+ if isinstance(value, list):
107
+ for i, v in enumerate(value):
108
+ _collect_program_option_inconsistencies(v, f"{path}[{i}]", hits)
109
+ return
110
+ if value is None:
111
+ return
112
+
113
+ text = str(value)
114
+ has_regular = bool(re.search(r"\bREGULAR\b", text, flags=re.IGNORECASE))
115
+ has_prime = bool(re.search(r"\bPRIME\b", text, flags=re.IGNORECASE))
116
+ if has_regular ^ has_prime:
117
+ hits.append(path)
118
+
119
+
120
+ def _prepare_university_data(
121
+ uni_raw: dict[str, Any],
122
+ allow_remote: bool,
123
+ include_inactive_programs: bool,
124
+ debug: bool,
125
+ stats: dict[str, Any],
126
+ ) -> dict[str, Any]:
127
+ """Prepare a single university's template data.
128
+
129
+ Extracts overview, campus image, benefits, programs, and extra sections
130
+ from the raw sections list. This moves the logic that was in
131
+ render_university_section into a data-preparation step so that the
132
+ Jinja2 template handles the HTML.
133
+ """
134
+ uni_name = uni_raw["name"]
135
+ sections = uni_raw.get("sections", [])
136
+ is_first = uni_raw.get("_is_first", False)
137
+
138
+ stats["universities"] = stats.get("universities", 0) + 1
139
+
140
+ # Build section map; merge duplicate "programs"
141
+ sec_map: dict[str, dict] = {}
142
+ for s in sections:
143
+ if not isinstance(s, dict):
144
+ continue
145
+ k = str(s.get("section_key", ""))
146
+ if not k:
147
+ continue
148
+ if k == "programs" and k in sec_map:
149
+ existing = sec_map["programs"].get("section_json", {})
150
+ incoming = s.get("section_json", {})
151
+ if not isinstance(existing, dict):
152
+ existing = {}
153
+ if not isinstance(incoming, dict):
154
+ incoming = {}
155
+ a = existing.get("programs", [])
156
+ b = incoming.get("programs", [])
157
+ if not isinstance(a, list):
158
+ a = []
159
+ if not isinstance(b, list):
160
+ b = []
161
+ existing["programs"] = a + b
162
+ sec_map["programs"]["section_json"] = existing
163
+ continue
164
+ sec_map[k] = s
165
+
166
+ # Campus image
167
+ img_section = sec_map.get("campus_image") or sec_map.get("image")
168
+ campus_image = ""
169
+ campus_caption = ""
170
+ if img_section:
171
+ j = img_section.get("section_json", {})
172
+ if isinstance(j, dict):
173
+ campus_url = str(j.get("image_url", "")).strip()
174
+ campus_caption = str(j.get("caption", "")).strip()
175
+ if allow_remote and campus_url:
176
+ embedded = fetch_image_data_uri(campus_url)
177
+ if embedded:
178
+ campus_image = embedded
179
+ stats["images_embedded"] = stats.get("images_embedded", 0) + 1
180
+ else:
181
+ stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1
182
+ else:
183
+ stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1
184
+
185
+ # Overview and website
186
+ resolved_website = (uni_raw.get("website") or "").strip()
187
+ overview_data = None
188
+
189
+ if "overview" in sec_map:
190
+ overview_json = sec_map["overview"].get("section_json", {})
191
+ if not isinstance(overview_json, dict):
192
+ overview_json = {}
193
+
194
+ site_from_overview = get_any(
195
+ overview_json,
196
+ ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"],
197
+ )
198
+ if not resolved_website and site_from_overview:
199
+ resolved_website = site_from_overview
200
+
201
+ overview_data = {
202
+ "founded": get_any(overview_json, ["founded", "Founded"]),
203
+ "total_students": get_any(overview_json, ["total_students", "Total Students"]),
204
+ "undergraduates": get_any(overview_json, ["undergraduates", "Undergraduate Students", "undergraduate_students"]),
205
+ "postgraduates": get_any(overview_json, ["postgraduate_students", "Postgraduate Students"]),
206
+ "acceptance_rate": get_any(overview_json, ["acceptance_rate", "Acceptance Rate"]),
207
+ "location": get_any(overview_json, ["location", "Location"]),
208
+ "tuition": format_money_figures(str(get_any(overview_json, [
209
+ "tuition_out_of_state_yearly",
210
+ "Yearly Out of State Tuition Fees",
211
+ "Yearly Out-of-State Tuition Fees",
212
+ "Yearly Tuition Fees",
213
+ "Yearly Out-of-State Tuition Fees:",
214
+ ]) or "")) or None,
215
+ }
216
+
217
+ if resolved_website:
218
+ stats["university_links"] = stats.get("university_links", 0) + 1
219
+ stats["website_rows"] = stats.get("website_rows", 0) + 1
220
+
221
+ # Benefits
222
+ # Benefits + Funding
223
+ benefits = []
224
+ funding_heading = "Funding Available"
225
+ funding_items: list[str] = []
226
+
227
+ if "benefits" in sec_map:
228
+ j = sec_map["benefits"].get("section_json", {})
229
+ if not isinstance(j, dict):
230
+ j = {}
231
+
232
+ raw_benefits = j.get("benefits", [])
233
+ if isinstance(raw_benefits, list):
234
+ benefits = [str(b).strip() for b in raw_benefits if str(b).strip()]
235
+ else:
236
+ benefits = []
237
+
238
+ funding_heading, funding_items = _extract_university_funding(
239
+ j,
240
+ {
241
+ "school_category": uni_raw.get("school_category"),
242
+ "status": "in" if is_truthy(uni_raw.get("is_active", True)) else "out",
243
+ },
244
+ )
245
+
246
+ # Programs
247
+ programs = None
248
+ if "programs" in sec_map:
249
+ j = sec_map["programs"].get("section_json", {})
250
+ if not isinstance(j, dict):
251
+ j = {}
252
+ programs_raw = j.get("programs", [])
253
+ if not isinstance(programs_raw, list):
254
+ programs_raw = []
255
+
256
+ if not include_inactive_programs:
257
+ programs_raw = [
258
+ p for p in programs_raw
259
+ if isinstance(p, dict) and is_truthy(
260
+ p.get("program_active", p.get("is_active", p.get("active", 1)))
261
+ )
262
+ ]
263
+
264
+ programs = []
265
+ seen_names = set()
266
+ for p in programs_raw:
267
+ if not isinstance(p, dict):
268
+ continue
269
+ program_name = str(p.get("program_name", "")).strip()
270
+ # Deduplicate by lowercase program name
271
+ key = program_name.lower()
272
+ if key in seen_names:
273
+ continue
274
+ seen_names.add(key)
275
+ link = str(p.get("program_link", "")).strip()
276
+ if not link and isinstance(p.get("program_links"), dict):
277
+ link = str(p["program_links"].get("web_link", "")).strip()
278
+
279
+ programs.append({
280
+ "name": program_name,
281
+ "link": link,
282
+ "designation": str(p.get("designation", "")),
283
+ "entrance": str(p.get("entrance_exam", p.get("entrance_examination", ""))),
284
+ })
285
+
286
+ # Extra sections
287
+ skip_keys = {"campus_image", "image", "overview", "benefits", "programs"}
288
+ extra_sections = []
289
+ for s in sections:
290
+ if not isinstance(s, dict):
291
+ continue
292
+ k = str(s.get("section_key", ""))
293
+ if not k or k in skip_keys:
294
+ continue
295
+ title = str(s.get("section_title", ""))
296
+ j = s.get("section_json", {})
297
+ if not isinstance(j, dict):
298
+ j = {}
299
+ rendered = render_global_blocks(k, title, j, debug)
300
+ extra_sections.append({"rendered_html": Markup(rendered)})
301
+
302
+ classes = ["uni"]
303
+ if not is_first:
304
+ classes.append("page-break")
305
+
306
+ return {
307
+ "name": uni_name,
308
+ "anchor": uni_raw.get("anchor"),
309
+ "sort_order": uni_raw.get("sort_order"),
310
+ "website": resolved_website,
311
+ "classes": classes,
312
+ "overview": overview_data,
313
+ "campus_image": campus_image,
314
+ "campus_caption": campus_caption,
315
+ "benefits": benefits,
316
+ "funding_heading": funding_heading,
317
+ "funding_items": funding_items,
318
+ "programs": programs,
319
+ "extra_sections": extra_sections,
320
+ }
321
+
322
+
323
+ def build_handbook_html(
324
+ globals_data: list[dict[str, Any]],
325
+ by_uni: dict[int, dict[str, Any]],
326
+ images: dict[str, Any],
327
+ allow_remote: bool,
328
+ include_inactive_programs: bool = False,
329
+ debug: bool = False,
330
+ ) -> str:
331
+ """Build the full handbook HTML document using Jinja2 templates.
332
+
333
+ Preserves the same data preparation logic from the original version.
334
+ Rendering is delegated to Jinja2 templates with Playwright-compatible
335
+ HTML/CSS output.
336
+ """
337
+ env = _get_jinja_env()
338
+ template = env.get_template("handbook.html")
339
+
340
+ font_meta = select_font_family()
341
+ font_css = font_face_css(font_meta)
342
+
343
+ # Base URL for static assets (CSS, images, etc.)
344
+ base_url = _static_base_url()
345
+
346
+ stats: dict[str, Any] = {
347
+ "universities": 0,
348
+ "images_embedded": 0,
349
+ "images_placeholder": 0,
350
+ "program_links_total": 0,
351
+ "program_missing_links_total": 0,
352
+ "missing_program_links": {},
353
+ "university_links": 0,
354
+ "website_rows": 0,
355
+ "program_option_warnings": [],
356
+ }
357
+
358
+ # ── Cover Image ──
359
+ cover_image = images.get("coverImage", "")
360
+ if cover_image and os.path.isfile(cover_image):
361
+ cover_image = Path(cover_image).as_uri()
362
+ else:
363
+ cover_image = ""
364
+
365
+ # ── TOC Image ──
366
+ toc_image = images.get("tocImage", "")
367
+ if toc_image and os.path.isfile(toc_image):
368
+ toc_image = Path(toc_image).as_uri()
369
+ else:
370
+ toc_image = ""
371
+
372
+ # ── Header Image (repeating page header) ──
373
+ header_image = images.get("headerImage", "")
374
+ if header_image and os.path.isfile(header_image):
375
+ mime = mimetypes.guess_type(header_image)[0] or "image/jpeg"
376
+ with open(header_image, "rb") as f:
377
+ header_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode()}"
378
+ else:
379
+ header_image = ""
380
+
381
+ # ── Label Image (repeating right-side label) ──
382
+ label_image = images.get("labelImage", "")
383
+ if label_image and os.path.isfile(label_image):
384
+ mime = mimetypes.guess_type(label_image)[0] or "image/png"
385
+ with open(label_image, "rb") as f:
386
+ label_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode()}"
387
+ else:
388
+ logger.warning("Label image not found locally: %s", label_image)
389
+ label_image = ""
390
+
391
+ # ── Prepare active universities (sorted: Tier One first, Tier Two second) ──
392
+ active_universities: list[dict[str, Any]] = []
393
+ for uid, uni in by_uni.items():
394
+ if not isinstance(uni, dict):
395
+ continue
396
+ if not is_truthy(uni.get("is_active", True)):
397
+ continue
398
+ name = str(uni.get("university_name", f"University #{uid}"))
399
+ anchor = handbook_anchor("uni", name, int(uid))
400
+ school_category = str(uni.get("school_category", "")).strip()
401
+ tier = uni.get("tier")
402
+ tier_label = str(uni.get("tier_label", "")).strip()
403
+ active_universities.append({
404
+ "id": int(uid),
405
+ "anchor": anchor,
406
+ "name": name,
407
+ "sections": uni.get("sections", []) if isinstance(uni.get("sections"), list) else [],
408
+ "website": str(uni.get("website", "")),
409
+ "sort_order": int(uni["sort_order"]) if uni.get("sort_order") is not None and str(uni.get("sort_order", "")).lstrip("-").isdigit() else None,
410
+ "school_category": school_category,
411
+ "tier": tier,
412
+ "tier_label": tier_label,
413
+ })
414
+
415
+ # Stable tier ordering: Tier One (non_cosigner) β†’ Tier Two (cosigner) β†’ others, then alphabetical
416
+ def _tier_sort(u: dict) -> tuple:
417
+ t = u.get("tier")
418
+ rank = t if isinstance(t, int) else 99
419
+ return (rank, (u.get("name") or "").lower(), u.get("id", 0))
420
+ active_universities.sort(key=_tier_sort)
421
+
422
+ # ── Normalise globals ──
423
+ globals_data = sort_sections_stable(globals_data)
424
+
425
+ required_keys = [
426
+ "table_of_contents",
427
+ "overview",
428
+ "how_the_program_works",
429
+ ]
430
+ existing_keys = {str(g.get("section_key", "")).lower() for g in globals_data if isinstance(g, dict)}
431
+ missing = [k for k in required_keys if k not in existing_keys]
432
+ if missing:
433
+ msg = f"Handbook required sections missing: {','.join(missing)}"
434
+ logger.error(msg)
435
+ raise RuntimeError(msg)
436
+
437
+ general_sections: list[dict[str, Any]] = []
438
+ toc_sort_order = None
439
+ toc_title = "Table of Contents"
440
+
441
+ for idx, g in enumerate(globals_data):
442
+ if not isinstance(g, dict):
443
+ continue
444
+ key_raw = str(g.get("section_key", ""))
445
+ key = key_raw.lower()
446
+ sort_order = int(g["sort_order"]) if g.get("sort_order") is not None and str(g.get("sort_order", "")).lstrip("-").isdigit() else None
447
+
448
+ if key == "table_of_contents" and toc_sort_order is None:
449
+ toc_sort_order = sort_order if sort_order is not None else (idx + 1)
450
+ toc_title = str(g.get("section_title", "Table of Contents"))
451
+ continue
452
+
453
+ section_hits: list[str] = []
454
+ _collect_program_option_inconsistencies(
455
+ g.get("section_json", {}),
456
+ f"global.{key_raw}",
457
+ section_hits,
458
+ )
459
+ for hit in section_hits:
460
+ if hit not in stats["program_option_warnings"]:
461
+ stats["program_option_warnings"].append(hit)
462
+
463
+ anchor = handbook_anchor("g", str(g.get("section_title", g.get("section_key", "section"))), idx)
464
+ general_sections.append({
465
+ "anchor": anchor,
466
+ "data": g,
467
+ "sort_order": sort_order,
468
+ })
469
+
470
+ # ── Build TOC items ──
471
+ toc_items: list[dict[str, Any]] = []
472
+ for gs in general_sections:
473
+ # Prefer the JSON-level title (display-ready) over the DB section_title
474
+ gs_json = gs["data"].get("section_json", {})
475
+ if isinstance(gs_json, dict) and gs_json.get("title", "").strip():
476
+ title = gs_json["title"].strip()
477
+ else:
478
+ title = str(gs["data"].get("section_title", gs["data"].get("section_key", "Section")))
479
+ toc_items.append({
480
+ "title": title,
481
+ "target": "#" + gs["anchor"],
482
+ "level": 0,
483
+ "bold": True,
484
+ "sort": gs["sort_order"],
485
+ })
486
+
487
+ for u in active_universities:
488
+ toc_items.append({
489
+ "title": u["name"],
490
+ "target": "#" + u["anchor"],
491
+ "level": 1,
492
+ "bold": False,
493
+ "sort": u.get("sort_order"),
494
+ })
495
+
496
+ # ── Prepare sorted TOC items for template ──
497
+ sorted_toc = sort_toc(list(toc_items))
498
+ toc_items_sorted = []
499
+ for e in sorted_toc:
500
+ if not isinstance(e, dict):
501
+ continue
502
+ title = str(e.get("title", "")).strip()
503
+ if not title:
504
+ continue
505
+ level = max(0, min(3, int(e.get("level", 0))))
506
+ bold = bool(e.get("bold", False))
507
+ upper = bool(e.get("upper", False))
508
+ if level == 0:
509
+ bold = True
510
+ upper = True
511
+ display_title = title.upper() if upper else title
512
+ page = str(e.get("page", "")).strip()
513
+
514
+ toc_items_sorted.append({
515
+ "title": title,
516
+ "display_title": display_title,
517
+ "target": str(e.get("target", e.get("anchor", ""))).strip(),
518
+ "level": level,
519
+ "bold": bold,
520
+ "upper": upper,
521
+ "page": page,
522
+ })
523
+
524
+ # ── Prepare general sections with rendered HTML and typed blocks ──
525
+ template_sections = []
526
+ for gs in general_sections:
527
+ data = gs["data"]
528
+ key_lower = str(data.get("section_key", "")).lower()
529
+
530
+ sec_class = SECTION_CLASS_MAP.get(key_lower)
531
+ if sec_class is None:
532
+ sec_class = "sec-" + re.sub(r"[^a-z0-9]+", "-", key_lower)
533
+
534
+ section_json = data.get("section_json", {})
535
+ if not isinstance(section_json, dict):
536
+ section_json = {}
537
+
538
+ # Typed blocks for the new rendering path
539
+ blocks = normalize_section(
540
+ str(data.get("section_key", "")),
541
+ str(data.get("section_title", "")),
542
+ section_json,
543
+ debug=debug,
544
+ )
545
+
546
+ # Legacy HTML fallback
547
+ section_html = render_global_blocks(
548
+ str(data.get("section_key", "")),
549
+ str(data.get("section_title", "")),
550
+ section_json,
551
+ debug,
552
+ )
553
+
554
+ if not section_html.strip() and not blocks:
555
+ logger.warning(
556
+ "Empty section render key=%s sort_order=%s",
557
+ data.get("section_key"),
558
+ data.get("sort_order"),
559
+ )
560
+
561
+ template_sections.append({
562
+ "anchor": gs["anchor"],
563
+ "data": data,
564
+ "page_break": key_lower in PAGE_BREAK_KEYS,
565
+ "sec_class": sec_class,
566
+ "blocks": blocks,
567
+ "rendered_html": Markup(section_html),
568
+ })
569
+
570
+ # ── Prepare university data for templates (both old + new paths) ──
571
+ # Group by tier for tier heading insertion in the PDF output
572
+ university_template_data = []
573
+ university_block_data = []
574
+ # Track which tier label was last emitted so we can insert tier divider headings
575
+ _seen_tier_labels: set[str] = set()
576
+
577
+ for idx, uni_raw in enumerate(active_universities):
578
+ uni_raw["_is_first"] = (idx == 0)
579
+
580
+ # Insert tier group heading when tier changes
581
+ current_tier_label = str(uni_raw.get("tier_label", "")).strip()
582
+ if current_tier_label and current_tier_label not in _seen_tier_labels:
583
+ _seen_tier_labels.add(current_tier_label)
584
+ # Mark this university as starting a new tier group
585
+ uni_raw["_tier_group_start"] = True
586
+ uni_raw["_tier_group_label"] = f"{current_tier_label} Schools"
587
+
588
+ uni_hits: list[str] = []
589
+ _collect_program_option_inconsistencies(
590
+ uni_raw.get("sections", []),
591
+ f"university.{uni_raw.get('name', idx)}",
592
+ uni_hits,
593
+ )
594
+ for hit in uni_hits:
595
+ if hit not in stats["program_option_warnings"]:
596
+ stats["program_option_warnings"].append(hit)
597
+
598
+ # Legacy path
599
+ uni_data = _prepare_university_data(
600
+ uni_raw, allow_remote, include_inactive_programs, debug, stats,
601
+ )
602
+ # Carry tier metadata to template data
603
+ uni_data["tier"] = uni_raw.get("tier")
604
+ uni_data["tier_label"] = uni_raw.get("tier_label", "")
605
+ uni_data["tier_group_start"] = uni_raw.get("_tier_group_start", False)
606
+ uni_data["tier_group_label"] = uni_raw.get("_tier_group_label", "")
607
+ university_template_data.append(uni_data)
608
+ # New block path
609
+ uni_block = normalize_university(
610
+ uni_raw, allow_remote, include_inactive_programs, debug, stats,
611
+ )
612
+ university_block_data.append(uni_block)
613
+
614
+ # ── Bottom pages ──
615
+ bottom_pages_urls = []
616
+ raw_bottom = images.get("bottomPages", [])
617
+ if isinstance(raw_bottom, list):
618
+ for img_path in raw_bottom:
619
+ if os.path.isfile(str(img_path)):
620
+ bottom_pages_urls.append(Path(str(img_path)).as_uri())
621
+
622
+ # ── Render template ──
623
+ if stats["program_option_warnings"]:
624
+ logger.warning(
625
+ "Program option consistency warnings (missing REGULAR or PRIME pair): %s",
626
+ stats["program_option_warnings"],
627
+ )
628
+
629
+ html = template.render(
630
+ font_css=Markup(font_css),
631
+ base_url=base_url,
632
+ extra_css="",
633
+ header_image=header_image,
634
+ label_image=label_image,
635
+ cover_image=cover_image,
636
+ toc_image=toc_image,
637
+ toc_items=toc_items,
638
+ toc_items_sorted=toc_items_sorted,
639
+ toc_title=toc_title,
640
+ toc_sort_order=toc_sort_order,
641
+ general_sections=template_sections,
642
+ summary_block=None,
643
+ universities=university_template_data,
644
+ university_blocks=university_block_data,
645
+ bottom_pages=bottom_pages_urls,
646
+ debug=debug,
647
+ stats=stats,
648
+ )
649
+
650
+ return html
app/services/normalizer.py ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Normalization layer β€” converts raw MySQL handbook content into typed render blocks.
2
+
3
+ Each section_json from the database is parsed into a list of RenderBlock
4
+ objects. Every block has a `block_type` that maps 1-to-1 to a Jinja
5
+ partial and a CSS class. This prevents ad-hoc interpretation of raw
6
+ JSON throughout the rendering pipeline.
7
+
8
+ Block types (from theme.BLOCK_TYPES):
9
+ heading_1, heading_2, paragraph, bullet_list, note, table,
10
+ enrollment_steps, school_profile, university_summary, toc,
11
+ cover, full_page_image
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ from urllib.parse import quote_plus
18
+ from dataclasses import dataclass, field
19
+ from typing import Any
20
+
21
+ from app.services.renderers import _extract_university_funding
22
+ from app.services.utils import (
23
+ ensure_program_options_pair,
24
+ emphasize_keywords,
25
+ format_money_figures,
26
+ get_any,
27
+ h,
28
+ hb_slug,
29
+ is_assoc,
30
+ is_truthy,
31
+ linkify_urls,
32
+ )
33
+ from app.services.renderers import fetch_image_data_uri
34
+
35
+
36
+ # ───────────────────────────────────────────────────────────────
37
+ # Block data-classes
38
+ # ───────────────────────────────────────────────────────────────
39
+
40
+ @dataclass
41
+ class RenderBlock:
42
+ """Base typed render block."""
43
+ block_type: str
44
+ css_class: str = ""
45
+ data: dict[str, Any] = field(default_factory=dict)
46
+
47
+
48
+ # ───────────────────────────────────────────────────────────────
49
+ # Section β†’ blocks
50
+ # ───────────────────────────────────────────────────────────────
51
+
52
+ def normalize_section(
53
+ section_key: str,
54
+ section_title: str,
55
+ section_json: dict | list,
56
+ *,
57
+ universities: list[dict] | None = None,
58
+ debug: bool = False,
59
+ ) -> list[RenderBlock]:
60
+ """Convert a single global section payload into a list of RenderBlocks.
61
+
62
+ This is the single translation point between the database schema
63
+ and the rendering layer.
64
+ """
65
+ blocks: list[RenderBlock] = []
66
+ key_norm = section_key.lower().strip()
67
+
68
+ if not isinstance(section_json, dict):
69
+ section_json = {}
70
+
71
+ layout_norm = str(section_json.get("layout", "")).lower().strip()
72
+
73
+ # ── Section heading ──
74
+ # Prefer the JSON-level title (display-ready) over the DB section_title
75
+ json_title = str(section_json.get("title", "")).strip() if isinstance(section_json, dict) else ""
76
+ title = json_title or section_title.strip()
77
+ if title and key_norm != "table_of_contents":
78
+ blocks.append(RenderBlock(
79
+ block_type="heading_1",
80
+ css_class="hb-heading-1",
81
+ data={"text": title},
82
+ ))
83
+
84
+ # ── Steps β†’ enrollment_steps ──
85
+ steps = section_json.get("steps")
86
+ if isinstance(steps, list):
87
+ blocks.append(RenderBlock(
88
+ block_type="enrollment_steps",
89
+ css_class="hb-enrollment-steps",
90
+ data={"steps": _normalize_steps(steps)},
91
+ ))
92
+ return blocks
93
+
94
+ # ── Bullets ──
95
+ has_bullets = isinstance(section_json.get("bullets"), list)
96
+ has_items = isinstance(section_json.get("items"), list)
97
+ if has_bullets or (layout_norm == "bullets_with_note" and has_items):
98
+ from markupsafe import Markup
99
+ lst = section_json.get("items") if has_items else section_json.get("bullets")
100
+ items = [_normalize_text_content(str(b).strip()) for b in lst if str(b).strip()]
101
+ html_items = [Markup(emphasize_keywords(it)) for it in items]
102
+ blocks.append(RenderBlock(
103
+ block_type="bullet_list",
104
+ css_class="hb-bullet-list",
105
+ data={"entries": html_items, "html_entries": True},
106
+ ))
107
+ note = _normalize_text_content(
108
+ str(section_json.get("note", section_json.get("footnote", ""))).strip()
109
+ )
110
+ if note:
111
+ blocks.append(RenderBlock(
112
+ block_type="note",
113
+ css_class="hb-note",
114
+ data={"text": note},
115
+ ))
116
+ return blocks
117
+
118
+ # ── Basic table ──
119
+ cols = section_json.get("columns")
120
+ rows = section_json.get("rows")
121
+ if isinstance(cols, list) and isinstance(rows, list):
122
+ blocks.append(_normalize_basic_table(cols, rows))
123
+ return blocks
124
+
125
+ # ── table_v2 ──
126
+ if layout_norm == "table_v2":
127
+ blocks.append(_normalize_table_v2(section_json))
128
+ return blocks
129
+
130
+ # ── doc_v1 ──
131
+ if layout_norm == "doc_v1" and isinstance(section_json.get("blocks"), list):
132
+ blocks.extend(_normalize_doc_v1(section_json["blocks"], skip_title=title))
133
+ # Post-process breakdown section for Relocation Cost layout
134
+ if key_norm == "program_features_breakdown":
135
+ blocks = _postprocess_breakdown(blocks, section_json["blocks"])
136
+ # Post-process Tier 2 section for sub-bullet styling
137
+ if key_norm == "summary_of_universities_cosigner":
138
+ blocks = _postprocess_tier2(blocks)
139
+ return blocks
140
+
141
+ # ── Fallback ──
142
+ if "text" in section_json:
143
+ text = _normalize_text_content(str(section_json["text"]))
144
+ if text.strip():
145
+ from markupsafe import Markup
146
+ blocks.append(RenderBlock(
147
+ block_type="paragraph",
148
+ css_class="hb-paragraph",
149
+ data={
150
+ "text": text,
151
+ "html": Markup(emphasize_keywords(text)),
152
+ },
153
+ ))
154
+
155
+ return blocks
156
+
157
+
158
+ def _normalize_text_content(text: str) -> str:
159
+ """Apply global handbook text normalization in a single place."""
160
+ return ensure_program_options_pair(format_money_figures(text))
161
+
162
+
163
+ # ───────────────────────────────────────────────────────────────
164
+ # University profile normalisation
165
+ # ───────────────────────────────────────────────────────────────
166
+
167
+ def normalize_university(
168
+ uni_raw: dict[str, Any],
169
+ allow_remote: bool,
170
+ include_inactive_programs: bool,
171
+ debug: bool,
172
+ stats: dict[str, Any],
173
+ ) -> RenderBlock:
174
+ """Convert raw university data into a school_profile RenderBlock."""
175
+ uni_name = uni_raw["name"]
176
+ sections = uni_raw.get("sections", [])
177
+ is_first = uni_raw.get("_is_first", False)
178
+
179
+ stats["universities"] = stats.get("universities", 0) + 1
180
+
181
+ # Build section map; merge duplicate "programs" sections
182
+ sec_map: dict[str, dict] = {}
183
+ for s in sections:
184
+ if not isinstance(s, dict):
185
+ continue
186
+ k = str(s.get("section_key", ""))
187
+ if not k:
188
+ continue
189
+ if k == "programs" and k in sec_map:
190
+ existing = sec_map["programs"].get("section_json", {})
191
+ incoming = s.get("section_json", {})
192
+ if not isinstance(existing, dict):
193
+ existing = {}
194
+ if not isinstance(incoming, dict):
195
+ incoming = {}
196
+ a = existing.get("programs", [])
197
+ b = incoming.get("programs", [])
198
+ if not isinstance(a, list):
199
+ a = []
200
+ if not isinstance(b, list):
201
+ b = []
202
+ existing["programs"] = a + b
203
+ sec_map["programs"]["section_json"] = existing
204
+ continue
205
+ sec_map[k] = s
206
+
207
+ # Campus image
208
+ img_section = sec_map.get("campus_image") or sec_map.get("image")
209
+ campus_image = ""
210
+ campus_caption = ""
211
+ if img_section:
212
+ j = img_section.get("section_json", {})
213
+ if isinstance(j, dict):
214
+ campus_url = str(j.get("image_url", "")).strip()
215
+ campus_caption = str(j.get("caption", "")).strip()
216
+ if allow_remote and campus_url:
217
+ embedded = fetch_image_data_uri(campus_url)
218
+ if embedded:
219
+ campus_image = embedded
220
+ stats["images_embedded"] = stats.get("images_embedded", 0) + 1
221
+ else:
222
+ stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1
223
+ else:
224
+ stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1
225
+
226
+ # Overview and website
227
+ resolved_website = (uni_raw.get("website") or "").strip()
228
+ overview_data = None
229
+
230
+ if "overview" in sec_map:
231
+ overview_json = sec_map["overview"].get("section_json", {})
232
+ if not isinstance(overview_json, dict):
233
+ overview_json = {}
234
+
235
+ site_from_overview = get_any(
236
+ overview_json,
237
+ ["university_website", "university_website_url", "website",
238
+ "site", "url", "homepage", "web_url"],
239
+ )
240
+ if not resolved_website and site_from_overview:
241
+ resolved_website = site_from_overview
242
+
243
+ overview_data = {
244
+ "founded": get_any(overview_json, ["founded", "Founded"]),
245
+ "total_students": get_any(overview_json, ["total_students", "Total Students"]),
246
+ "undergraduates": get_any(overview_json, [
247
+ "undergraduates", "Undergraduate Students", "undergraduate_students",
248
+ ]),
249
+ "postgraduates": get_any(overview_json, [
250
+ "postgraduate_students", "Postgraduate Students",
251
+ ]),
252
+ "acceptance_rate": get_any(overview_json, ["acceptance_rate", "Acceptance Rate"]),
253
+ "location": get_any(overview_json, ["location", "Location"]),
254
+ "tuition": format_money_figures(str(get_any(overview_json, [
255
+ "tuition_out_of_state_yearly",
256
+ "Yearly Out of State Tuition Fees",
257
+ "Yearly Out-of-State Tuition Fees",
258
+ "Yearly Tuition Fees",
259
+ "Yearly Out-of-State Tuition Fees:",
260
+ ]) or "")) or None,
261
+ }
262
+
263
+ if resolved_website:
264
+ stats["university_links"] = stats.get("university_links", 0) + 1
265
+ stats["website_rows"] = stats.get("website_rows", 0) + 1
266
+
267
+ # Benefits + Funding
268
+ benefits: list[str] | None = []
269
+ funding_heading = "Funding Available"
270
+ funding_items: list[str] = []
271
+ if "benefits" in sec_map:
272
+ j = sec_map["benefits"].get("section_json", {})
273
+ if not isinstance(j, dict):
274
+ j = {}
275
+ raw_benefits = j.get("benefits", [])
276
+ if isinstance(raw_benefits, list):
277
+ benefits = [
278
+ _normalize_text_content(str(b).strip())
279
+ for b in raw_benefits
280
+ if str(b).strip()
281
+ ]
282
+ else:
283
+ benefits = []
284
+
285
+ funding_heading, funding_items = _extract_university_funding(
286
+ j,
287
+ {
288
+ "school_category": uni_raw.get("school_category"),
289
+ "status": "in" if is_truthy(uni_raw.get("is_active", True)) else "out",
290
+ },
291
+ )
292
+ # Normalize money formatting in funding items
293
+ funding_items = [_normalize_text_content(item) for item in funding_items]
294
+
295
+ # Programs
296
+ programs = None
297
+ if "programs" in sec_map:
298
+ j = sec_map["programs"].get("section_json", {})
299
+ if not isinstance(j, dict):
300
+ j = {}
301
+ programs_raw = j.get("programs", [])
302
+ if not isinstance(programs_raw, list):
303
+ programs_raw = []
304
+
305
+ if not include_inactive_programs:
306
+ programs_raw = [
307
+ p for p in programs_raw
308
+ if isinstance(p, dict) and is_truthy(
309
+ p.get("program_active", p.get("is_active", p.get("active", 1)))
310
+ )
311
+ ]
312
+
313
+ programs = []
314
+ seen_names = set()
315
+ for p in programs_raw:
316
+ if not isinstance(p, dict):
317
+ continue
318
+ program_name = _normalize_text_content(str(p.get("program_name", "")).strip())
319
+ # Deduplicate by lowercase program name
320
+ key = program_name.lower()
321
+ if key in seen_names:
322
+ continue
323
+ seen_names.add(key)
324
+ link = str(p.get("program_link", "")).strip()
325
+ if not link and isinstance(p.get("program_links"), dict):
326
+ link = str(p["program_links"].get("web_link", "")).strip()
327
+
328
+ programs.append({
329
+ "name": program_name,
330
+ "link": link,
331
+ "designation": _normalize_text_content(str(p.get("designation", ""))),
332
+ "entrance": _normalize_text_content(str(p.get("entrance_exam", p.get("entrance_examination", "")))),
333
+ })
334
+
335
+ # Extra sections (rendered via global blocks normalizer)
336
+ skip_keys = {"campus_image", "image", "overview", "benefits", "programs"}
337
+ extra_blocks: list[list[RenderBlock]] = []
338
+ for s in sections:
339
+ if not isinstance(s, dict):
340
+ continue
341
+ k = str(s.get("section_key", ""))
342
+ if not k or k in skip_keys:
343
+ continue
344
+ title = str(s.get("section_title", ""))
345
+ j = s.get("section_json", {})
346
+ if not isinstance(j, dict):
347
+ j = {}
348
+ extra_blocks.append(normalize_section(k, title, j, debug=debug))
349
+
350
+ classes = ["hb-school-profile", "page-break"]
351
+
352
+ return RenderBlock(
353
+ block_type="school_profile",
354
+ css_class=" ".join(classes),
355
+ data={
356
+ "name": uni_name,
357
+ "anchor": uni_raw.get("anchor"),
358
+ "sort_order": uni_raw.get("sort_order"),
359
+ "website": resolved_website,
360
+ "overview": overview_data,
361
+ "campus_image": campus_image,
362
+ "campus_caption": campus_caption,
363
+ "benefits": benefits,
364
+ "funding_heading": funding_heading,
365
+ "funding_items": funding_items,
366
+ "programs": programs,
367
+ "extra_blocks": extra_blocks,
368
+ },
369
+ )
370
+
371
+
372
+ # ───────────────────────────────────────────────────────────────
373
+ # Internal helpers
374
+ # ───────────────────────────────────────────────────────────────
375
+
376
+ def _normalize_steps(steps: list) -> list[dict]:
377
+ """Normalise enrollment steps into structured dicts."""
378
+ result = []
379
+ step_num = 0
380
+ for s in steps:
381
+ if not isinstance(s, dict):
382
+ continue
383
+ step_num += 1
384
+ step_title = str(s.get("title", s.get("step_title", ""))).strip()
385
+ body = _normalize_text_content(str(s.get("body", s.get("description", ""))).strip())
386
+
387
+ # Pre-format body with bold emphasis on REGULAR, PRIME, $ amounts
388
+ from markupsafe import Markup
389
+ body_html = Markup(emphasize_keywords(body)) if body else ""
390
+
391
+ links = []
392
+ plain_links = []
393
+ raw_links = s.get("links", [])
394
+ if isinstance(raw_links, list):
395
+ for lnk in raw_links:
396
+ if not isinstance(lnk, dict):
397
+ continue
398
+ label = str(lnk.get("label", "Link")).strip()
399
+ url = str(lnk.get("url", "")).strip()
400
+ if url:
401
+ low_label = label.lower()
402
+ low_url = url.lower()
403
+ is_telegram = "telegram" in low_label or "t.me" in low_url
404
+ if step_num == 2 and "internationalscholarsprogram.com" in low_url and not re.match(r"^https?://", url, flags=re.IGNORECASE):
405
+ url = "https://" + url
406
+ # All links (including Telegram) are rendered as clickable anchors.
407
+ # For Telegram use the full URL as visible label so readers can see/type it.
408
+ link_label = url if is_telegram else label
409
+ links.append({"label": link_label, "url": url})
410
+
411
+ if step_num == 2 and not any(
412
+ "internationalscholarsprogram.com" in str(l.get("url", "")).lower()
413
+ for l in links
414
+ ):
415
+ links.append({
416
+ "label": "www.internationalscholarsprogram.com",
417
+ "url": "https://www.internationalscholarsprogram.com",
418
+ })
419
+
420
+ qr = str(s.get("qr_url", s.get("qr_image", ""))).strip()
421
+ telegram_url = ""
422
+ if step_num == 1:
423
+ telegram_ref = ""
424
+ if plain_links:
425
+ telegram_ref = plain_links[0]
426
+ elif isinstance(body, str):
427
+ m = re.search(r"(https?://(?:t\.me|telegram\.me)/[^\s<)]+)", body, flags=re.IGNORECASE)
428
+ if m:
429
+ telegram_ref = m.group(1)
430
+ if telegram_ref:
431
+ telegram_url = telegram_ref
432
+ if not qr:
433
+ qr = (
434
+ "https://api.qrserver.com/v1/create-qr-code/?size=160x160&data="
435
+ + quote_plus(telegram_ref)
436
+ )
437
+ # Strip the raw telegram URL and the follow-up description from body
438
+ body = re.sub(r"https?://(?:t\.me|telegram\.me)/[^\s<)]+", "", body, flags=re.IGNORECASE)
439
+ body = re.sub(r"This telegram group will help you interact with program administrators and other prospective students where you can ask any questions you may have about the program\.?", "", body, flags=re.IGNORECASE)
440
+ body = re.sub(r"\n{2,}", "\n", body).strip()
441
+ body_html = Markup(emphasize_keywords(body)) if body else ""
442
+
443
+ result.append({
444
+ "number": step_num,
445
+ "title": step_title,
446
+ "body": body,
447
+ "body_html": body_html,
448
+ "links": links,
449
+ "plain_links": plain_links,
450
+ "qr_url": qr,
451
+ "telegram_url": telegram_url,
452
+ })
453
+ return result
454
+
455
+
456
+ def _normalize_basic_table(cols: list, rows: list) -> RenderBlock:
457
+ """Normalise a basic table (columns + rows)."""
458
+ norm_rows = []
459
+ for r in rows:
460
+ if not isinstance(r, (list, dict)):
461
+ continue
462
+ if isinstance(r, dict):
463
+ row = []
464
+ for col_label in cols:
465
+ key_guess = re.sub(r"[^a-z0-9]+", "_", str(col_label).lower())
466
+ cell = r.get(key_guess, "")
467
+ # Normalize text, emphasize keywords, then linkify URLs for clickable links
468
+ cell_html = emphasize_keywords(_normalize_text_content(str(cell)))
469
+ cell_with_links = linkify_urls(cell_html)
470
+ row.append(cell_with_links)
471
+ norm_rows.append(row)
472
+ else:
473
+ norm_rows.append([linkify_urls(emphasize_keywords(_normalize_text_content(str(cell)))) for cell in r])
474
+
475
+ return RenderBlock(
476
+ block_type="table",
477
+ css_class="hb-table",
478
+ data={
479
+ "columns": [str(c) for c in cols],
480
+ "rows": norm_rows,
481
+ "variant": "standard",
482
+ },
483
+ )
484
+
485
+
486
+ def _normalize_table_v2(json_data: dict) -> RenderBlock:
487
+ """Normalise table_v2 (comparison table with header groups)."""
488
+ base_cols = json_data.get("base_columns", [])
489
+ groups = json_data.get("header_groups", [])
490
+ rows = json_data.get("rows", [])
491
+ if not isinstance(base_cols, list):
492
+ base_cols = []
493
+ if not isinstance(groups, list):
494
+ groups = []
495
+ if not isinstance(rows, list):
496
+ rows = []
497
+
498
+ all_cols: list[dict] = []
499
+ for c in base_cols:
500
+ if isinstance(c, dict):
501
+ all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))})
502
+ for g in groups:
503
+ if not isinstance(g, dict):
504
+ continue
505
+ g_cols = g.get("columns", [])
506
+ if not isinstance(g_cols, list):
507
+ g_cols = []
508
+ for c in g_cols:
509
+ if isinstance(c, dict):
510
+ all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))})
511
+
512
+ norm_rows = []
513
+ for r in rows:
514
+ if not isinstance(r, dict):
515
+ continue
516
+ row = {}
517
+ for c in all_cols:
518
+ k = c.get("key", "")
519
+ val = r.get(k, "")
520
+ if isinstance(val, dict):
521
+ val = val.get("text", "")
522
+ row[k] = emphasize_keywords(_normalize_text_content(str(val)))
523
+ norm_rows.append(row)
524
+
525
+ return RenderBlock(
526
+ block_type="table",
527
+ css_class="hb-table hb-table-comparison",
528
+ data={
529
+ "base_columns": [{"key": c.get("key", ""), "label": c.get("label", "")} for c in base_cols if isinstance(c, dict)],
530
+ "header_groups": [
531
+ {
532
+ "label": str(g.get("label", "")),
533
+ "columns": [{"key": str(c.get("key", "")), "label": str(c.get("label", ""))}
534
+ for c in (g.get("columns", []) if isinstance(g.get("columns"), list) else [])
535
+ if isinstance(c, dict)],
536
+ }
537
+ for g in groups if isinstance(g, dict)
538
+ ],
539
+ "all_columns": all_cols,
540
+ "rows": norm_rows,
541
+ "variant": "comparison",
542
+ },
543
+ )
544
+
545
+
546
+ # ───────────────────────────────────────────────────────────────
547
+ # Breakdown section post-processor
548
+ # ───────────────────────────────────────────────────────────────
549
+
550
+ def _postprocess_breakdown(
551
+ blocks: list[RenderBlock],
552
+ raw_blocks: list,
553
+ ) -> list[RenderBlock]:
554
+ """Rewrite the breakdown section to match the reference layout.
555
+
556
+ - "Relocation Cost" becomes a banner heading with page-break-before
557
+ - The relocation table gets a merged right cell (rowspan) with the
558
+ cost-coverage note moved inside it
559
+ - "ISP FINANCING" becomes an inline note with mixed bold/italic
560
+ - "NB: CREDIT FACILITY" is styled green
561
+ - Dollar amounts in parentheticals keep their original $ format
562
+ """
563
+ from markupsafe import Markup
564
+
565
+ # Find raw blocks for the relocation cost table (pre-normalised, $ intact)
566
+ raw_reloc_table = None
567
+ raw_note_after_table = None
568
+ found_reloc = False
569
+ for i, rb in enumerate(raw_blocks):
570
+ if not isinstance(rb, dict):
571
+ continue
572
+ if rb.get("type") == "subheading" and "relocation" in str(rb.get("text", "")).lower():
573
+ found_reloc = True
574
+ continue
575
+ if found_reloc and rb.get("type") == "table_v1" and raw_reloc_table is None:
576
+ raw_reloc_table = rb
577
+ continue
578
+ if found_reloc and raw_reloc_table and rb.get("type") == "paragraph" and raw_note_after_table is None:
579
+ raw_note_after_table = rb
580
+ break
581
+
582
+ result: list[RenderBlock] = []
583
+ i = 0
584
+ while i < len(blocks):
585
+ blk = blocks[i]
586
+
587
+ # ── Detect "Relocation Cost" heading ──
588
+ if (blk.block_type == "heading_2"
589
+ and "relocation" in blk.data.get("text", "").lower()):
590
+
591
+ # Banner heading with page break
592
+ result.append(RenderBlock(
593
+ block_type="heading_2",
594
+ css_class="hb-heading-2 hb-banner-heading page-break",
595
+ data={"text": blk.data["text"]},
596
+ ))
597
+ i += 1
598
+
599
+ # Replace the next table with spanning variant that has merged cell
600
+ if i < len(blocks) and blocks[i].block_type == "table" and raw_reloc_table:
601
+ raw_rows = raw_reloc_table.get("rows", [])
602
+ # Build the note text for the merged right cell
603
+ note_text = ""
604
+ if raw_note_after_table:
605
+ note_text = str(raw_note_after_table.get("text", ""))
606
+
607
+ spanning_rows = _build_relocation_spanning_rows(raw_rows, note_text)
608
+ result.append(RenderBlock(
609
+ block_type="table",
610
+ css_class="hb-table hb-relocation-table",
611
+ data={"rows": spanning_rows, "variant": "spanning"},
612
+ ))
613
+ i += 1 # skip the original table
614
+
615
+ # Skip the paragraph that was moved into the merged cell
616
+ if (i < len(blocks)
617
+ and blocks[i].block_type == "paragraph"
618
+ and note_text):
619
+ i += 1
620
+ continue
621
+
622
+ # ── "ISP FINANCING" heading β†’ inline note with mixed formatting ──
623
+ if (blk.block_type == "heading_2"
624
+ and "isp financing" in blk.data.get("text", "").lower()):
625
+ # Next block should be the interest rate paragraph
626
+ rate_text = ""
627
+ if i + 1 < len(blocks) and blocks[i + 1].block_type == "paragraph":
628
+ rate_text = blocks[i + 1].data.get("text", "")
629
+ result.append(RenderBlock(
630
+ block_type="note",
631
+ css_class="hb-note hb-isp-financing",
632
+ data={
633
+ "parts": [
634
+ {"text": "ISP FINANCING", "style": "bold"},
635
+ {"text": " (" + _extract_rate_italic(rate_text) + "): " if rate_text else "", "style": "italic"},
636
+ {"text": _extract_rate_amount(rate_text), "style": "bold"},
637
+ ],
638
+ "inline": True,
639
+ },
640
+ ))
641
+ i += 1 # skip the heading
642
+ if rate_text:
643
+ i += 1 # skip the paragraph
644
+ continue
645
+
646
+ # ── "NB: CREDIT FACILITY" note β†’ green styling ──
647
+ if (blk.block_type == "note"
648
+ and "credit facility" in blk.data.get("text", "").lower()):
649
+ result.append(RenderBlock(
650
+ block_type="note",
651
+ css_class="hb-note hb-credit-note",
652
+ data=blk.data,
653
+ ))
654
+ i += 1
655
+ continue
656
+
657
+ result.append(blk)
658
+ i += 1
659
+
660
+ return result
661
+
662
+
663
+ def _build_relocation_spanning_rows(
664
+ raw_rows: list, note_text: str,
665
+ ) -> list[list[dict]]:
666
+ """Build spanning rows for the relocation cost table.
667
+
668
+ Row 0: normal 2-column (consultation fees | Covered in the contribution)
669
+ Rows 1-7: left cell per row, right cell merged (rowspan) with italic note
670
+ Rows 8+: left cell only, empty right
671
+ """
672
+ from markupsafe import Markup
673
+
674
+ if not raw_rows:
675
+ return []
676
+
677
+ rows: list[list[dict]] = []
678
+
679
+ # Row 0 β€” has "Covered in the contribution"
680
+ first = raw_rows[0] if raw_rows else ["", ""]
681
+ rows.append([
682
+ {"text": Markup(emphasize_keywords(str(first[0] if len(first) > 0 else ""))), "colspan": 1, "rowspan": 1},
683
+ {"text": Markup("<em>" + h(str(first[1] if len(first) > 1 else "")) + "</em>"), "colspan": 1, "rowspan": 1},
684
+ ])
685
+
686
+ # Rows 1-7: items with dollar amounts that get the merged right cell
687
+ # These are the visa/fee/rent/ticket rows (have parenthetical dollar amounts)
688
+ merged_start = 1
689
+ merged_end = min(8, len(raw_rows)) # Visa Integrity through Air ticket
690
+
691
+ for idx in range(merged_start, len(raw_rows)):
692
+ cell_text = str(raw_rows[idx][0] if len(raw_rows[idx]) > 0 else "")
693
+ left = {"text": Markup(emphasize_keywords(cell_text)), "colspan": 1, "rowspan": 1}
694
+
695
+ if idx == merged_start and note_text:
696
+ # First merged row gets the rowspan cell
697
+ span_count = merged_end - merged_start
698
+ note_html = note_text.replace("\n\n", "<br/><br/>")
699
+ right = {
700
+ "text": Markup('<em class="hb-merged-note">' + h(note_html).replace("&lt;br/&gt;&lt;br/&gt;", "<br/><br/>") + "</em>"),
701
+ "colspan": 1,
702
+ "rowspan": span_count,
703
+ }
704
+ rows.append([left, right])
705
+ elif idx < merged_end:
706
+ # Subsequent merged rows β€” no right cell (covered by rowspan)
707
+ rows.append([left])
708
+ else:
709
+ # Remaining rows β€” empty right cell
710
+ rows.append([
711
+ left,
712
+ {"text": "", "colspan": 1, "rowspan": 1},
713
+ ])
714
+
715
+ return rows
716
+
717
+
718
+ def _extract_rate_italic(text: str) -> str:
719
+ """Extract the italic portion: 'Interest rate of 12% – 15% Market Rate PA'."""
720
+ # Text is like: "Interest rate of 12% – 15% Market Rate: UP TO USD 10,000"
721
+ m = re.match(r"(Interest rate.*?(?:Market Rate|PA))", text, re.IGNORECASE)
722
+ if m:
723
+ return m.group(1).rstrip(": ")
724
+ # Fallback: everything before the colon
725
+ if ":" in text:
726
+ return text.split(":")[0].strip()
727
+ return text
728
+
729
+
730
+ def _extract_rate_amount(text: str) -> str:
731
+ """Extract the amount portion: 'UP TO USD 10,000'."""
732
+ m = re.search(r"(UP TO.*)", text, re.IGNORECASE)
733
+ if m:
734
+ return m.group(1).strip()
735
+ if ":" in text:
736
+ return text.split(":", 1)[1].strip()
737
+ return ""
738
+
739
+
740
+ # ───────────────────────────────────────────────────────────────
741
+ # Tier 2 (cosigner) section post-processor
742
+ # ───────────────────────────────────────────────────────────────
743
+
744
+ def _postprocess_tier2(blocks: list[RenderBlock]) -> list[RenderBlock]:
745
+ """Style the Tier 2 section to match the reference layout.
746
+
747
+ - Second consecutive bullet_list (sub-bullets under Sources of Funds)
748
+ gets checkmark styling instead of arrows.
749
+ """
750
+ result: list[RenderBlock] = []
751
+ prev_was_bullet = False
752
+ for blk in blocks:
753
+ if blk.block_type == "bullet_list":
754
+ if prev_was_bullet:
755
+ # This is the sub-bullet list β†’ use checkmark class
756
+ result.append(RenderBlock(
757
+ block_type="bullet_list",
758
+ css_class="hb-bullet-list hb-sub-bullets",
759
+ data=blk.data,
760
+ ))
761
+ else:
762
+ result.append(blk)
763
+ prev_was_bullet = True
764
+ else:
765
+ prev_was_bullet = False
766
+ result.append(blk)
767
+ return result
768
+
769
+
770
+ def _normalize_doc_v1(blocks: list, *, skip_title: str = "") -> list[RenderBlock]:
771
+ """Normalise doc_v1 blocks into typed RenderBlocks.
772
+
773
+ Args:
774
+ skip_title: When set, any leading heading/subheading block whose text
775
+ matches this title (case-insensitive) is dropped to avoid
776
+ duplicating the section heading already emitted by the caller.
777
+ """
778
+ from markupsafe import Markup
779
+ _skip_norm = skip_title.strip().lower() if skip_title else ""
780
+ result: list[RenderBlock] = []
781
+ for b in blocks:
782
+ if not isinstance(b, dict):
783
+ continue
784
+ btype = str(b.get("type", ""))
785
+
786
+ # Skip heading/subheading blocks that duplicate the section title
787
+ if _skip_norm and btype in ("heading", "subheading"):
788
+ block_text = str(b.get("text", "")).strip().lower()
789
+ if block_text == _skip_norm:
790
+ continue
791
+
792
+ if btype == "paragraph":
793
+ t = _normalize_text_content(str(b.get("text", "")))
794
+ if t.strip():
795
+ result.append(RenderBlock(
796
+ block_type="paragraph",
797
+ css_class="hb-paragraph",
798
+ data={
799
+ "text": t,
800
+ "html": Markup(emphasize_keywords(t)),
801
+ },
802
+ ))
803
+
804
+ elif btype == "subheading":
805
+ t = _normalize_text_content(str(b.get("text", "")))
806
+ if t.strip():
807
+ result.append(RenderBlock(
808
+ block_type="heading_2",
809
+ css_class="hb-heading-2",
810
+ data={"text": t},
811
+ ))
812
+
813
+ elif btype == "bullets":
814
+ items = b.get("items", [])
815
+ if not isinstance(items, list):
816
+ items = []
817
+ normalized = [_normalize_text_content(str(it).strip()) for it in items if str(it).strip()]
818
+ html_items = [Markup(emphasize_keywords(it)) for it in normalized]
819
+ if normalized:
820
+ result.append(RenderBlock(
821
+ block_type="bullet_list",
822
+ css_class="hb-bullet-list",
823
+ data={"entries": html_items, "html_entries": True},
824
+ ))
825
+
826
+ elif btype == "numbered_list":
827
+ items = b.get("items", [])
828
+ if not isinstance(items, list):
829
+ items = []
830
+ normalized = [_normalize_text_content(str(it).strip()) for it in items if str(it).strip()]
831
+ html_items = [Markup(emphasize_keywords(it)) for it in normalized]
832
+ if normalized:
833
+ result.append(RenderBlock(
834
+ block_type="bullet_list",
835
+ css_class="hb-bullet-list hb-numbered-list",
836
+ data={"entries": html_items, "ordered": True, "html_entries": True},
837
+ ))
838
+
839
+ elif btype == "note":
840
+ t = _normalize_text_content(str(b.get("text", "")))
841
+ if t.strip():
842
+ result.append(RenderBlock(
843
+ block_type="note",
844
+ css_class="hb-note",
845
+ data={"text": t},
846
+ ))
847
+
848
+ elif btype == "note_inline":
849
+ parts = b.get("parts", [])
850
+ if not isinstance(parts, list):
851
+ parts = []
852
+ normalized_parts = []
853
+ for p in parts:
854
+ if not isinstance(p, dict):
855
+ continue
856
+ t = _normalize_text_content(str(p.get("text", "")))
857
+ if t:
858
+ normalized_parts.append({
859
+ "text": t,
860
+ "style": str(p.get("style", "")),
861
+ })
862
+ if normalized_parts:
863
+ result.append(RenderBlock(
864
+ block_type="note",
865
+ css_class="hb-note",
866
+ data={"parts": normalized_parts, "inline": True},
867
+ ))
868
+
869
+ elif btype == "table_v1":
870
+ t_cols = b.get("columns", [])
871
+ t_rows = b.get("rows", [])
872
+ if not isinstance(t_cols, list):
873
+ t_cols = []
874
+ if not isinstance(t_rows, list):
875
+ t_rows = []
876
+ norm_rows = []
877
+ for r in t_rows:
878
+ if not isinstance(r, list):
879
+ continue
880
+ norm_rows.append([emphasize_keywords(_normalize_text_content(str(cell))) for cell in r])
881
+ result.append(RenderBlock(
882
+ block_type="table",
883
+ css_class="hb-table",
884
+ data={"columns": [str(c) for c in t_cols], "rows": norm_rows, "variant": "standard"},
885
+ ))
886
+
887
+ elif btype == "table":
888
+ # Generic table (columns may be objects or strings, rows may be dicts or lists)
889
+ t_cols = b.get("columns", [])
890
+ t_rows = b.get("rows", [])
891
+ if not isinstance(t_cols, list):
892
+ t_cols = []
893
+ if not isinstance(t_rows, list):
894
+ t_rows = []
895
+ col_labels = []
896
+ col_keys = []
897
+ for c in t_cols:
898
+ if isinstance(c, dict):
899
+ col_labels.append(str(c.get("label", c.get("key", ""))))
900
+ col_keys.append(str(c.get("key", "")))
901
+ else:
902
+ col_labels.append(str(c))
903
+ col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower()))
904
+ norm_rows = []
905
+ for r in t_rows:
906
+ if isinstance(r, dict):
907
+ norm_rows.append([emphasize_keywords(_normalize_text_content(str(r.get(k, "")))) for k in col_keys])
908
+ elif isinstance(r, list):
909
+ norm_rows.append([emphasize_keywords(_normalize_text_content(str(cell))) for cell in r])
910
+ result.append(RenderBlock(
911
+ block_type="table",
912
+ css_class="hb-table",
913
+ data={"columns": col_labels, "rows": norm_rows, "variant": "standard"},
914
+ ))
915
+
916
+ elif btype in ("table_v3", "table_v4"):
917
+ t_rows = b.get("rows", [])
918
+ if not isinstance(t_rows, list):
919
+ t_rows = []
920
+ norm_rows = []
921
+ for r in t_rows:
922
+ if not isinstance(r, list):
923
+ continue
924
+ norm_row = []
925
+ for cell in r:
926
+ if isinstance(cell, dict):
927
+ norm_row.append({
928
+ "text": emphasize_keywords(_normalize_text_content(str(cell.get("text", "")))),
929
+ "colspan": int(cell.get("colspan", 1)) if str(cell.get("colspan", "")).isdigit() else 1,
930
+ "rowspan": int(cell.get("rowspan", 1)) if str(cell.get("rowspan", "")).isdigit() else 1,
931
+ })
932
+ else:
933
+ norm_row.append({
934
+ "text": emphasize_keywords(_normalize_text_content(str(cell))),
935
+ "colspan": 1,
936
+ "rowspan": 1,
937
+ })
938
+ norm_rows.append(norm_row)
939
+ result.append(RenderBlock(
940
+ block_type="table",
941
+ css_class="hb-table",
942
+ data={"rows": norm_rows, "variant": "spanning"},
943
+ ))
944
+
945
+ return result
app/services/pdf_renderer.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Playwright-based PDF renderer β€” Chromium headless PDF export.
2
+
3
+ Replaces WeasyPrint. Uses Playwright to launch headless Chromium,
4
+ load the fully-rendered HTML, wait for fonts/images/layout, and
5
+ export a print-quality PDF.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import logging
12
+ import os
13
+ import tempfile
14
+ from pathlib import Path
15
+ from typing import Optional
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Singleton browser instance for reuse across requests
20
+ _browser = None
21
+ _browser_lock = asyncio.Lock()
22
+
23
+
24
+ async def _get_browser():
25
+ """Get or create a persistent Chromium browser instance.
26
+
27
+ Uses ``channel="chrome"`` so Playwright drives the system-installed
28
+ Google Chrome (or Chromium) instead of requiring a separate browser
29
+ download from the Playwright CDN. Falls back to the default
30
+ bundled Chromium if the system browser is not found.
31
+ """
32
+ global _browser
33
+ async with _browser_lock:
34
+ if _browser is None or not _browser.is_connected():
35
+ from playwright.async_api import async_playwright
36
+
37
+ pw = await async_playwright().start()
38
+
39
+ launch_args = [
40
+ "--no-sandbox",
41
+ "--disable-setuid-sandbox",
42
+ "--disable-dev-shm-usage",
43
+ "--disable-gpu",
44
+ "--font-render-hinting=none",
45
+ ]
46
+
47
+ # Try system Chrome first, then fall back to bundled Chromium
48
+ try:
49
+ _browser = await pw.chromium.launch(
50
+ channel="chrome",
51
+ headless=True,
52
+ args=launch_args,
53
+ )
54
+ logger.info("System Chrome launched for PDF rendering")
55
+ except Exception:
56
+ logger.warning(
57
+ "System Chrome not available, falling back to bundled Chromium"
58
+ )
59
+ _browser = await pw.chromium.launch(
60
+ headless=True,
61
+ args=launch_args,
62
+ )
63
+ logger.info("Bundled Chromium launched for PDF rendering")
64
+ return _browser
65
+
66
+
67
+ async def shutdown_browser():
68
+ """Gracefully close the browser on application shutdown."""
69
+ global _browser
70
+ async with _browser_lock:
71
+ if _browser and _browser.is_connected():
72
+ await _browser.close()
73
+ _browser = None
74
+ logger.info("Chromium browser closed")
75
+
76
+
77
+ async def render_pdf_from_html(
78
+ html_content: str,
79
+ *,
80
+ format: str = "A4",
81
+ print_background: bool = True,
82
+ prefer_css_page_size: bool = True,
83
+ wait_timeout: int = 30000,
84
+ ) -> bytes:
85
+ """Render HTML string to PDF bytes using Playwright Chromium.
86
+
87
+ Generates a base PDF (content only, no decorative header/label),
88
+ then creates a one-page overlay with the header image and right-side
89
+ label, and stamps the overlay onto content pages (page 3 β†’ last
90
+ content page) using pypdf. Pages 1-2 (cover/TOC) and trailing
91
+ full-page image pages get no overlay.
92
+
93
+ Args:
94
+ html_content: Complete HTML document string.
95
+ format: Page format (default A4).
96
+ print_background: Include background colors/images.
97
+ prefer_css_page_size: Use @page CSS rules for sizing.
98
+ wait_timeout: Max time (ms) to wait for page load.
99
+
100
+ Returns:
101
+ PDF file bytes.
102
+ """
103
+ browser = await _get_browser()
104
+ context = await browser.new_context(
105
+ viewport={"width": 794, "height": 1123}, # A4 at 96dpi
106
+ device_scale_factor=1,
107
+ java_script_enabled=True,
108
+ )
109
+ page = await context.new_page()
110
+
111
+ try:
112
+ # Write HTML to a temp file so Chromium can load local file:// resources
113
+ with tempfile.NamedTemporaryFile(
114
+ mode="w",
115
+ suffix=".html",
116
+ delete=False,
117
+ encoding="utf-8",
118
+ ) as tmp:
119
+ tmp.write(html_content)
120
+ tmp_path = tmp.name
121
+
122
+ try:
123
+ file_url = Path(tmp_path).as_uri()
124
+ await page.goto(file_url, wait_until="load", timeout=wait_timeout)
125
+
126
+ # Wait for fonts and images to be fully loaded
127
+ await page.evaluate("() => document.fonts.ready")
128
+ await page.evaluate("""
129
+ () => {
130
+ const images = Array.from(document.querySelectorAll('img'));
131
+ return Promise.all(images.map(img => {
132
+ if (img.complete) return Promise.resolve();
133
+ return new Promise(r => {
134
+ img.addEventListener('load', r);
135
+ img.addEventListener('error', r);
136
+ });
137
+ }));
138
+ }
139
+ """)
140
+
141
+ # ── Collect info from DOM before hiding elements ──
142
+ header_src = await page.evaluate("""
143
+ () => {
144
+ const img = document.querySelector('.page-header img');
145
+ return img ? img.src : '';
146
+ }
147
+ """)
148
+ label_src = await page.evaluate("""
149
+ () => {
150
+ const img = document.querySelector('.hb-right-label img');
151
+ return img ? img.src : '';
152
+ }
153
+ """)
154
+ num_bottom_pages = await page.evaluate("""
155
+ () => document.querySelectorAll('.fullpage-img-wrap').length
156
+ """)
157
+ # Cover page count: cover + TOC image (each is a .cover-page)
158
+ num_cover_pages = await page.evaluate("""
159
+ () => document.querySelectorAll('.cover-page').length
160
+ """)
161
+
162
+ logger.info(
163
+ "Overlay info: header=%s, label=%s, covers=%d, bottoms=%d",
164
+ bool(header_src), bool(label_src),
165
+ num_cover_pages, num_bottom_pages,
166
+ )
167
+
168
+ # ── Hide header, footer, and label from the base PDF ──
169
+ await page.evaluate("""
170
+ () => {
171
+ document.querySelectorAll('.page-header, .page-footer, .hb-right-label')
172
+ .forEach(el => el.style.display = 'none');
173
+ }
174
+ """)
175
+
176
+ # ── Render BASE PDF (no header, no label) ──
177
+ base_pdf = await page.pdf(
178
+ format=format,
179
+ print_background=print_background,
180
+ prefer_css_page_size=prefer_css_page_size,
181
+ margin={
182
+ "top": "2.54cm",
183
+ "right": "2.54cm",
184
+ "bottom": "2.54cm",
185
+ "left": "2.54cm",
186
+ },
187
+ display_header_footer=True,
188
+ header_template='<span></span>',
189
+ footer_template=(
190
+ '<div style="width:100%;text-align:center;font-size:9px;'
191
+ 'font-family:Century Gothic,Segoe UI,sans-serif;color:#0263A3;'
192
+ 'padding:0 0 6px 0;letter-spacing:0.5px;">'
193
+ '<span style="font-weight:700;" class="pageNumber"></span></div>'
194
+ ),
195
+ )
196
+ logger.info("Base PDF rendered, size=%d bytes", len(base_pdf))
197
+
198
+ finally:
199
+ os.unlink(tmp_path)
200
+
201
+ # ── Build overlay (header + label) and stamp onto content pages ──
202
+ if not header_src and not label_src:
203
+ logger.info("No header or label to overlay, returning base PDF")
204
+ return base_pdf
205
+
206
+ overlay_pdf = await _build_overlay_pdf(
207
+ page, header_src, label_src, format, wait_timeout
208
+ )
209
+
210
+ merged = _stamp_overlay(
211
+ base_pdf, overlay_pdf,
212
+ skip_front=num_cover_pages,
213
+ skip_back=num_bottom_pages,
214
+ )
215
+ logger.info("Final PDF with overlay, size=%d bytes", len(merged))
216
+ return merged
217
+
218
+ finally:
219
+ await context.close()
220
+
221
+
222
+ async def _build_overlay_pdf(
223
+ page, header_src: str, label_src: str,
224
+ format: str, timeout: int,
225
+ ) -> bytes:
226
+ """Render a single-page transparent overlay PDF with header + label."""
227
+ parts = []
228
+ if header_src:
229
+ parts.append(
230
+ f'<div style="position:fixed;top:0;left:0;width:100%;height:2.54cm;'
231
+ f'margin:0;padding:0;overflow:hidden;z-index:1;">'
232
+ f'<img src="{header_src}" style="display:block;width:100%;'
233
+ f'height:100%;object-fit:fill;margin:0;padding:0;" /></div>'
234
+ )
235
+ if label_src:
236
+ # Word doc: 3.0cm Γ— 22.7cm container, ~0.35cm bleeds past right edge.
237
+ # Scaled proportionally to A4: 2.9cm Γ— 24.1cm, right:-0.3cm to
238
+ # let part bleed off-page just like the Word original.
239
+ parts.append(
240
+ f'<div style="position:fixed;top:3.5cm;right:-0.3cm;width:2.9cm;'
241
+ f'height:24.1cm;z-index:2;overflow:visible;">'
242
+ f'<img src="{label_src}" style="display:block;width:100%;'
243
+ f'height:100%;object-fit:fill;" /></div>'
244
+ )
245
+
246
+ overlay_html = (
247
+ '<!doctype html><html><head><meta charset="utf-8">'
248
+ '<style>'
249
+ '@page{size:A4;margin:0}'
250
+ 'html,body{margin:0;padding:0;background:transparent}'
251
+ '</style></head><body>'
252
+ + '\n'.join(parts)
253
+ + '<div style="height:297mm;width:210mm;"></div>'
254
+ '</body></html>'
255
+ )
256
+
257
+ with tempfile.NamedTemporaryFile(
258
+ mode="w", suffix=".html", delete=False, encoding="utf-8",
259
+ ) as tmp:
260
+ tmp.write(overlay_html)
261
+ tmp_path = tmp.name
262
+
263
+ try:
264
+ await page.goto(
265
+ Path(tmp_path).as_uri(),
266
+ wait_until="load",
267
+ timeout=timeout,
268
+ )
269
+ await page.evaluate("() => document.fonts.ready")
270
+ await page.evaluate("""
271
+ () => {
272
+ const images = Array.from(document.querySelectorAll('img'));
273
+ return Promise.all(images.map(img => {
274
+ if (img.complete) return Promise.resolve();
275
+ return new Promise(r => {
276
+ img.addEventListener('load', r);
277
+ img.addEventListener('error', r);
278
+ });
279
+ }));
280
+ }
281
+ """)
282
+
283
+ overlay_bytes = await page.pdf(
284
+ format=format,
285
+ print_background=True,
286
+ prefer_css_page_size=True,
287
+ margin={"top": "0", "right": "0", "bottom": "0", "left": "0"},
288
+ display_header_footer=False,
289
+ )
290
+ logger.info("Overlay PDF rendered, size=%d bytes", len(overlay_bytes))
291
+ return overlay_bytes
292
+ finally:
293
+ os.unlink(tmp_path)
294
+
295
+
296
+ def _stamp_overlay(
297
+ base_pdf: bytes,
298
+ overlay_pdf: bytes,
299
+ skip_front: int = 2,
300
+ skip_back: int = 4,
301
+ ) -> bytes:
302
+ """Merge overlay onto content pages of the base PDF.
303
+
304
+ Pages 0..(skip_front-1) and (total-skip_back)..(total-1) are left
305
+ untouched. All other pages get the overlay stamped on top.
306
+ """
307
+ import io
308
+ from pypdf import PdfReader, PdfWriter
309
+
310
+ base = PdfReader(io.BytesIO(base_pdf))
311
+ overlay_reader = PdfReader(io.BytesIO(overlay_pdf))
312
+ overlay_page = overlay_reader.pages[0]
313
+ writer = PdfWriter()
314
+
315
+ total = len(base.pages)
316
+ first_content = skip_front # e.g. page index 2
317
+ last_content = total - skip_back - 1 # e.g. total-5
318
+
319
+ for i, pg in enumerate(base.pages):
320
+ if first_content <= i <= last_content:
321
+ pg.merge_page(overlay_page)
322
+ writer.add_page(pg)
323
+
324
+ buf = io.BytesIO()
325
+ writer.write(buf)
326
+ return buf.getvalue()
app/services/pdf_service.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """PDF generation service β€” Playwright Chromium PDF export.
2
+
3
+ Uses headless Chromium via Playwright to render the handbook HTML
4
+ and export a print-quality PDF. Replaces the previous WeasyPrint
5
+ approach for better CSS support and visual fidelity.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import logging
12
+ import os
13
+
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # No in-memory cache β€” every request fetches fresh data from the database
20
+
21
+
22
+ def _get_images_config() -> dict[str, Any]:
23
+ """Build images config from the images directory.
24
+
25
+ Mirrors PHP getImagesConfig().
26
+ """
27
+ from app.core.config import get_settings
28
+
29
+ settings = get_settings()
30
+ images_dir = os.path.realpath(settings.images_dir)
31
+
32
+ if not os.path.isdir(images_dir):
33
+ raise RuntimeError(f"Handbook images directory not found: {images_dir}")
34
+
35
+ cover_image = os.path.join(images_dir, "first_page.jpg")
36
+ toc_image = os.path.join(images_dir, "toc.jpg")
37
+ header_image = os.path.join(images_dir, "kenya_airlift_header.jpg")
38
+ label_image = os.path.join(images_dir, "label.png")
39
+
40
+ # Auto-discover bottom page images (sorted by filename, prefer .jpg over .png dupes)
41
+ _bottom_candidates = sorted(
42
+ f for f in os.listdir(images_dir)
43
+ if f.lower().endswith((".jpg", ".png"))
44
+ and "page-000" in f
45
+ and f != "first_page.jpg"
46
+ )
47
+ # Deduplicate: if both .jpg and .png exist for same stem, keep .jpg
48
+ _seen_stems: set[str] = set()
49
+ bottom_pages: list[str] = []
50
+ for f in _bottom_candidates:
51
+ stem = os.path.splitext(f)[0]
52
+ if stem in _seen_stems:
53
+ continue
54
+ _seen_stems.add(stem)
55
+ bottom_pages.append(os.path.join(images_dir, f))
56
+
57
+ # Validate required images exist
58
+ required = [cover_image, toc_image, header_image, label_image] + bottom_pages
59
+ missing = [p for p in required if not os.path.isfile(p)]
60
+ if missing:
61
+ logger.warning("Missing handbook images: %s", missing)
62
+ # Don't crash β€” generate with what we have
63
+
64
+ return {
65
+ "imagesDir": images_dir,
66
+ "chroot": os.path.dirname(images_dir),
67
+ "coverImage": cover_image if os.path.isfile(cover_image) else "",
68
+ "tocImage": toc_image if os.path.isfile(toc_image) else "",
69
+ "headerImage": header_image if os.path.isfile(header_image) else "",
70
+ "labelImage": label_image if os.path.isfile(label_image) else "",
71
+ "bottomPages": [p for p in bottom_pages if os.path.isfile(p)],
72
+ "httpsBaseHandbook": "",
73
+ }
74
+
75
+
76
+ def render_pdf(html: str) -> bytes:
77
+ """Render HTML to PDF bytes using Playwright Chromium.
78
+
79
+ This is a synchronous wrapper around the async Playwright renderer.
80
+ For async contexts, use render_pdf_async() instead.
81
+ """
82
+ import asyncio
83
+
84
+ try:
85
+ loop = asyncio.get_running_loop()
86
+ except RuntimeError:
87
+ loop = None
88
+
89
+ if loop and loop.is_running():
90
+ # Already in an async context β€” create a new task
91
+ import concurrent.futures
92
+ with concurrent.futures.ThreadPoolExecutor() as pool:
93
+ future = pool.submit(asyncio.run, _render_pdf_async(html))
94
+ return future.result()
95
+ else:
96
+ return asyncio.run(_render_pdf_async(html))
97
+
98
+
99
+ async def _render_pdf_async(html: str) -> bytes:
100
+ """Async: render HTML to PDF bytes via Playwright Chromium."""
101
+ from app.services.pdf_renderer import render_pdf_from_html
102
+ return await render_pdf_from_html(html)
103
+
104
+
105
+ async def generate_handbook_pdf(
106
+ catalog_id: int = 0,
107
+ include_inactive_programs: bool = False,
108
+ debug: bool = False,
109
+ ) -> bytes:
110
+ """Full pipeline: fetch data -> Jinja2 HTML -> Playwright Chromium -> PDF.
111
+
112
+ Mirrors the PHP download.php flow with Playwright as the rendering engine.
113
+
114
+ Optimizations over the naive sequential approach:
115
+ - Parallel API fetches (global + university sections concurrently)
116
+ - Parallel campus image prefetching (async batch instead of serial)
117
+ """
118
+ from app.services.data_fetcher import fetch_global_sections, fetch_university_sections
119
+ from app.services.html_builder import build_handbook_html
120
+ from app.services.pdf_renderer import render_pdf_from_html
121
+ from app.services.renderers import prefetch_images
122
+
123
+ images = _get_images_config()
124
+
125
+ # Fetch global sections and university sections in parallel
126
+ globals_data, by_uni = await asyncio.gather(
127
+ fetch_global_sections(catalog_id),
128
+ fetch_university_sections(),
129
+ )
130
+
131
+ # Collect all campus image URLs and prefetch them in parallel
132
+ campus_urls: list[str] = []
133
+ for uid, uni in by_uni.items():
134
+ if not isinstance(uni, dict):
135
+ continue
136
+ sections = uni.get("sections", [])
137
+ if not isinstance(sections, list):
138
+ continue
139
+ for s in sections:
140
+ if not isinstance(s, dict):
141
+ continue
142
+ k = str(s.get("section_key", ""))
143
+ if k in ("campus_image", "image"):
144
+ j = s.get("section_json", {})
145
+ if isinstance(j, dict):
146
+ url = str(j.get("image_url", "")).strip()
147
+ if url:
148
+ campus_urls.append(url)
149
+
150
+ if campus_urls:
151
+ await prefetch_images(campus_urls)
152
+
153
+ html = build_handbook_html(
154
+ globals_data,
155
+ by_uni,
156
+ images,
157
+ allow_remote=True,
158
+ include_inactive_programs=include_inactive_programs,
159
+ debug=debug,
160
+ )
161
+
162
+ pdf_bytes = await render_pdf_from_html(html)
163
+
164
+ return pdf_bytes
165
+
166
+
167
+ async def generate_handbook_html(
168
+ catalog_id: int = 0,
169
+ include_inactive_programs: bool = False,
170
+ debug: bool = False,
171
+ ) -> str:
172
+ """Full pipeline: fetch data -> Jinja2 HTML (no PDF conversion)."""
173
+ from app.services.data_fetcher import fetch_global_sections, fetch_university_sections
174
+ from app.services.html_builder import build_handbook_html
175
+ from app.services.renderers import prefetch_images
176
+
177
+ images = _get_images_config()
178
+
179
+ globals_data, by_uni = await asyncio.gather(
180
+ fetch_global_sections(catalog_id),
181
+ fetch_university_sections(),
182
+ )
183
+
184
+ # Prefetch campus images in parallel for the HTML build
185
+ campus_urls: list[str] = []
186
+ for uid, uni in by_uni.items():
187
+ if not isinstance(uni, dict):
188
+ continue
189
+ sections = uni.get("sections", [])
190
+ if not isinstance(sections, list):
191
+ continue
192
+ for s in sections:
193
+ if not isinstance(s, dict):
194
+ continue
195
+ k = str(s.get("section_key", ""))
196
+ if k in ("campus_image", "image"):
197
+ j = s.get("section_json", {})
198
+ if isinstance(j, dict):
199
+ url = str(j.get("image_url", "")).strip()
200
+ if url:
201
+ campus_urls.append(url)
202
+
203
+ if campus_urls:
204
+ await prefetch_images(campus_urls)
205
+
206
+ return build_handbook_html(
207
+ globals_data,
208
+ by_uni,
209
+ images,
210
+ allow_remote=True,
211
+ include_inactive_programs=include_inactive_programs,
212
+ debug=debug,
213
+ )
app/services/renderers.py ADDED
@@ -0,0 +1,1097 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Renderers β€” mirrors PHP renderers.php.
2
+
3
+ Contains functions for rendering:
4
+ - Table of Contents (TOC)
5
+ - Global section blocks (overview, steps, bullets, tables, doc_v1, etc.)
6
+ - University section blocks (overview, benefits, programs)
7
+ - Remote image fetching as data URIs
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import base64
13
+ import logging
14
+ import re
15
+ from typing import Any
16
+
17
+ import httpx
18
+
19
+ from app.services.utils import (
20
+ emphasize_keywords,
21
+ format_money_figures,
22
+ get_any,
23
+ h,
24
+ hb_slug,
25
+ is_assoc,
26
+ is_truthy,
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ # =========================================
33
+ # Image fetching (with in-memory cache + async batch support)
34
+ # =========================================
35
+
36
+ _image_cache: dict[str, str] = {}
37
+
38
+
39
+ def _detect_image_mime(data: bytes, content_type: str) -> str:
40
+ """Detect image MIME type from headers or magic bytes."""
41
+ if "image/" in content_type:
42
+ return content_type.split(";")[0].strip()
43
+ if data[:8].startswith(b"\x89PNG"):
44
+ return "image/png"
45
+ if data[:3] == b"\xff\xd8\xff":
46
+ return "image/jpeg"
47
+ if data[:4] == b"GIF8":
48
+ return "image/gif"
49
+ if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
50
+ return "image/webp"
51
+ return ""
52
+
53
+
54
+ def fetch_image_data_uri(url: str) -> str:
55
+ """Fetch a remote image and return as data:... URI. Mirrors PHP fetchImageDataUri."""
56
+ url = url.strip()
57
+ if not url:
58
+ return ""
59
+
60
+ # Check cache first (populated by prefetch_images)
61
+ if url in _image_cache:
62
+ return _image_cache[url]
63
+
64
+ try:
65
+ with httpx.Client(verify=False, timeout=12, follow_redirects=True) as client:
66
+ resp = client.get(url)
67
+ if resp.status_code < 200 or resp.status_code >= 300 or not resp.content:
68
+ logger.warning("Image fetch failed for %s status=%d", url, resp.status_code)
69
+ _image_cache[url] = ""
70
+ return ""
71
+ data = resp.content
72
+ except Exception as exc:
73
+ logger.warning("Image fetch error for %s: %s", url, exc)
74
+ _image_cache[url] = ""
75
+ return ""
76
+
77
+ mime = _detect_image_mime(data, resp.headers.get("content-type", ""))
78
+ if not mime.startswith("image/"):
79
+ logger.warning("Invalid image mime %s for %s", mime, url)
80
+ _image_cache[url] = ""
81
+ return ""
82
+
83
+ b64 = base64.b64encode(data).decode("ascii")
84
+ result = f"data:{mime};base64,{b64}"
85
+ _image_cache[url] = result
86
+ return result
87
+
88
+
89
+ async def prefetch_images(urls: list[str]) -> dict[str, str]:
90
+ """Fetch all images in parallel using async HTTP and populate the cache.
91
+
92
+ This is the key optimization: instead of fetching ~30 campus images
93
+ serially (30-60s), we fetch them all concurrently (~3-5s).
94
+ """
95
+ import asyncio
96
+
97
+ unique_urls = list({u.strip() for u in urls if u.strip() and u.strip() not in _image_cache})
98
+ if not unique_urls:
99
+ return {u: _image_cache.get(u.strip(), "") for u in urls}
100
+
101
+ async def _fetch_one(client: httpx.AsyncClient, url: str) -> tuple[str, str]:
102
+ try:
103
+ resp = await client.get(url)
104
+ if resp.status_code < 200 or resp.status_code >= 300 or not resp.content:
105
+ logger.warning("Prefetch image failed for %s status=%d", url, resp.status_code)
106
+ return url, ""
107
+ mime = _detect_image_mime(resp.content, resp.headers.get("content-type", ""))
108
+ if not mime.startswith("image/"):
109
+ logger.warning("Prefetch invalid mime %s for %s", mime, url)
110
+ return url, ""
111
+ b64 = base64.b64encode(resp.content).decode("ascii")
112
+ return url, f"data:{mime};base64,{b64}"
113
+ except Exception as exc:
114
+ logger.warning("Prefetch image error for %s: %s", url, exc)
115
+ return url, ""
116
+
117
+ logger.info("Prefetching %d campus images in parallel...", len(unique_urls))
118
+ async with httpx.AsyncClient(verify=False, timeout=15, follow_redirects=True) as client:
119
+ results = await asyncio.gather(*[_fetch_one(client, u) for u in unique_urls])
120
+
121
+ fetched = 0
122
+ for url, data_uri in results:
123
+ _image_cache[url] = data_uri
124
+ if data_uri:
125
+ fetched += 1
126
+
127
+ logger.info("Prefetched %d/%d images successfully", fetched, len(unique_urls))
128
+ return {u: _image_cache.get(u.strip(), "") for u in urls}
129
+
130
+
131
+ # =========================================
132
+ # Funding extraction
133
+ # =========================================
134
+
135
+ def _extract_university_funding(
136
+ j: dict,
137
+ school_meta: dict | None = None,
138
+ ) -> tuple[str, list[str]]:
139
+ """Extract funding heading + items from benefits section JSON.
140
+
141
+ Priority:
142
+ 1. section_json.funding.options
143
+ 2. section_json.funding_available
144
+ 3. fallback from pth_ref_schools.school_category
145
+ """
146
+ if not isinstance(j, dict):
147
+ j = {}
148
+
149
+ heading = "Funding Available"
150
+ items: list[str] = []
151
+
152
+ # 1. Preferred normalized shape
153
+ funding = j.get("funding", {})
154
+ if isinstance(funding, dict):
155
+ subheading = str(funding.get("subheading", "")).strip()
156
+ if subheading:
157
+ heading = subheading
158
+
159
+ options = funding.get("options", [])
160
+ if isinstance(options, list):
161
+ for opt in options:
162
+ if not isinstance(opt, dict):
163
+ continue
164
+ name = str(opt.get("name", "")).strip()
165
+ amount = str(opt.get("amount", "")).strip()
166
+
167
+ if name and amount:
168
+ items.append(f"{name} - {amount}")
169
+ elif name:
170
+ items.append(name)
171
+ elif amount:
172
+ items.append(amount)
173
+
174
+ # 2. Legacy fallback shape
175
+ if not items:
176
+ funding_available = j.get("funding_available", [])
177
+ if isinstance(funding_available, list):
178
+ for item in funding_available:
179
+ text = str(item).strip()
180
+ if text:
181
+ items.append(text)
182
+
183
+ # 3. School-category fallback
184
+ if not items and isinstance(school_meta, dict):
185
+ school_category = str(school_meta.get("school_category", "")).strip().lower()
186
+ status = str(school_meta.get("status", "")).strip().lower()
187
+
188
+ if status == "in":
189
+ if school_category == "non_cosigner":
190
+ items = [
191
+ "ISP Study Loan - $10,000",
192
+ "Partner 1 (Unsecured Loan) - Up to $50,000 per academic year",
193
+ "Partner 3 (Credit Option) - Up to $15,000",
194
+ ]
195
+ elif school_category == "cosigner":
196
+ items = [
197
+ "ISP Study Loan - $10,000",
198
+ "Partner 2 (A Cosigned Loan) - Full Coverage Support",
199
+ "Partner 3 (Credit Option) - Up to $15,000",
200
+ ]
201
+
202
+ return (heading, items)
203
+
204
+
205
+ # =========================================
206
+ # TOC sorting and rendering
207
+ # =========================================
208
+
209
+ def sort_toc(items: list[dict]) -> list[dict]:
210
+ """Mirrors PHP sortHandbookToc β€” sort by sort_order/sort, stable fallback."""
211
+ for idx, e in enumerate(items):
212
+ e.setdefault("_i", idx)
213
+
214
+ def key_fn(e: dict):
215
+ so = e.get("sort_order", e.get("sort"))
216
+ if so is not None:
217
+ try:
218
+ so_num = float(so)
219
+ return (0, so_num, e.get("_i", 0))
220
+ except (ValueError, TypeError):
221
+ pass
222
+ return (1, 0.0, e.get("_i", 0))
223
+
224
+ items.sort(key=key_fn)
225
+ for e in items:
226
+ e.pop("_i", None)
227
+ return items
228
+
229
+
230
+ def render_toc(items: list[dict], debug: bool = False, show_pages: bool = True) -> str:
231
+ """Render Table of Contents HTML (DOMPDF-safe).
232
+
233
+ Mirrors PHP renderToc().
234
+ """
235
+ sorted_items = sort_toc(items)
236
+
237
+ out = '<!-- HANDBOOK_TOC_V2 -->'
238
+ out += '<div class="toc">'
239
+ out += '<div class="toc-heading">Table of Contents</div>'
240
+ out += (
241
+ '<table class="toc-table" width="100%" cellspacing="0" cellpadding="0"'
242
+ ' style="border-collapse:collapse; table-layout:fixed; width:100%;">'
243
+ '<colgroup><col /><col width="50" /><col width="48" /></colgroup>'
244
+ )
245
+
246
+ for e in sorted_items:
247
+ if not isinstance(e, dict):
248
+ continue
249
+ title = str(e.get("title", "")).strip()
250
+ target = str(e.get("target", e.get("anchor", ""))).strip()
251
+ if not title:
252
+ continue
253
+
254
+ level = max(0, min(3, int(e.get("level", 0))))
255
+ bold = bool(e.get("bold", False))
256
+ upper = bool(e.get("upper", False))
257
+ if level == 0:
258
+ bold = True
259
+ upper = True
260
+
261
+ row_class = "toc-row--major" if level == 0 else "toc-row--sub"
262
+ if level >= 2:
263
+ row_class += " toc-row--deep"
264
+
265
+ text = title.upper() if upper else title
266
+ title_inner = h(text)
267
+ if target:
268
+ title_inner = f'<a href="{h(target)}">{title_inner}</a>'
269
+ if bold:
270
+ title_inner = f"<strong>{title_inner}</strong>"
271
+
272
+ page = str(e.get("page", "")).strip()
273
+ if show_pages and page:
274
+ page_cell = f"<strong>{h(page)}</strong>"
275
+ else:
276
+ page_cell = "&nbsp;"
277
+
278
+ indent = ""
279
+ if level == 1:
280
+ indent = "padding-left:16px;"
281
+ elif level >= 2:
282
+ indent = "padding-left:30px;"
283
+
284
+ title_style = (
285
+ "vertical-align:bottom; padding:1px 4px 1px 0; font-size:10px; "
286
+ "line-height:1.15; color:#111;"
287
+ + (" font-weight:700;" if bold else " font-weight:400;")
288
+ + (" text-transform:uppercase; letter-spacing:0.1px;" if upper else "")
289
+ + (f" {indent}" if indent else "")
290
+ )
291
+
292
+ out += f'<tr class="{h(row_class)}">'
293
+ out += f'<td class="toc-title" style="{title_style}">{title_inner}</td>'
294
+ out += '<td class="toc-dots" style="vertical-align:bottom; border-bottom:1px dotted #777; height:0.85em; padding:0;">&nbsp;</td>'
295
+ out += (
296
+ f'<td class="toc-pagenum" style="vertical-align:bottom; text-align:right; '
297
+ f'padding-left:4px; font-size:10px; font-weight:700; line-height:1.15; '
298
+ f'white-space:nowrap; width:48px; color:#111;">{page_cell}</td>'
299
+ )
300
+ out += "</tr>"
301
+
302
+ out += "</table></div>"
303
+ return out
304
+
305
+
306
+ def render_toc_hardcoded(
307
+ items: list[dict],
308
+ debug: bool = False,
309
+ page_start: int = 3,
310
+ page_offset: int = 0,
311
+ ) -> str:
312
+ """Mirrors PHP renderTocHardcoded β€” sort, assign sequential pages, render."""
313
+ sorted_items = sort_toc(items)
314
+
315
+ seq = max(1, page_start)
316
+ for item in sorted_items:
317
+ p = str(item.get("page", "")).strip()
318
+ if p and p.lstrip("-").isdigit():
319
+ display = int(p) + page_offset
320
+ item["page"] = str(display)
321
+ if display >= seq:
322
+ seq = display + 1
323
+ else:
324
+ item["page"] = str(seq)
325
+ seq += 1
326
+
327
+ out = "<!-- HANDBOOK_TOC_HARDCODED -->\n"
328
+ out += '<div class="toc">'
329
+ out += '<p class="toc-heading">Table of Contents</p>'
330
+ out += (
331
+ '<table class="toc-table" style="table-layout:fixed;width:100%;">'
332
+ '<colgroup><col /><col width="50" /><col width="48" /></colgroup>'
333
+ )
334
+
335
+ for e in sorted_items:
336
+ if not isinstance(e, dict):
337
+ continue
338
+ title = str(e.get("title", "")).strip()
339
+ target = str(e.get("target", e.get("anchor", ""))).strip()
340
+ if not title:
341
+ continue
342
+
343
+ level = max(0, min(3, int(e.get("level", 0))))
344
+ bold = bool(e.get("bold", False))
345
+ upper = bool(e.get("upper", False))
346
+ if level == 0:
347
+ bold = True
348
+ upper = True
349
+
350
+ row_class = "toc-row--major" if level == 0 else "toc-row--sub"
351
+ if level >= 2:
352
+ row_class += " toc-row--deep"
353
+
354
+ text = title.upper() if upper else title
355
+ title_inner = h(text)
356
+ if target:
357
+ title_inner = f'<a href="{h(target)}">{title_inner}</a>'
358
+ if bold:
359
+ title_inner = f"<strong>{title_inner}</strong>"
360
+
361
+ page = str(e.get("page", "")).strip()
362
+ page_html = f"<strong>{h(page)}</strong>" if page else "&nbsp;"
363
+
364
+ indent = ""
365
+ if level == 1:
366
+ indent = "padding-left:16px;"
367
+ elif level >= 2:
368
+ indent = "padding-left:30px;"
369
+
370
+ title_style = (
371
+ "vertical-align:bottom;padding:1px 4px 1px 0;font-size:10px;"
372
+ "line-height:1.15;color:#111;"
373
+ + ("font-weight:700;" if bold else "font-weight:400;")
374
+ + ("text-transform:uppercase;letter-spacing:0.1px;" if upper else "")
375
+ + indent
376
+ )
377
+
378
+ out += f'<tr class="{h(row_class)}">'
379
+ out += f'<td class="toc-title" style="{title_style}">{title_inner}</td>'
380
+ out += '<td class="toc-dots" style="vertical-align:bottom;padding:0;"><span class="toc-dots-inner">&nbsp;</span></td>'
381
+ out += (
382
+ f'<td class="toc-pagenum" style="vertical-align:bottom;text-align:right;'
383
+ f'padding-left:4px;font-size:10px;font-weight:700;line-height:1.15;'
384
+ f'white-space:nowrap;width:48px;color:#111111;">{page_html}</td>'
385
+ )
386
+ out += "</tr>"
387
+
388
+ out += "</table></div>"
389
+ return out
390
+
391
+
392
+ # =========================================
393
+ # table_v3 / table_v4 cell helpers
394
+ # =========================================
395
+
396
+ # Mapping of style names β†’ inline CSS strings for table_v3/v4 cells
397
+ _V3_STYLE_MAP: dict[str, str] = {
398
+ "band_teal": "text-align:center;font-weight:700;color:#fff;background:#199970;",
399
+ "band_navy": "text-align:center;font-weight:700;color:#fff;background:#0263A3;",
400
+ "bold_amounts": "font-weight:600;",
401
+ "green_center_bold": "text-align:center;font-weight:700;color:#199970;",
402
+ "center_bold_multiline": "text-align:center;font-weight:600;vertical-align:middle;",
403
+ "footer_center_bold": "text-align:center;font-weight:700;background:#f5f5f5;",
404
+ "covered_merged": "vertical-align:top;font-size:9pt;line-height:1.5;",
405
+ }
406
+
407
+
408
+ def _parse_v3_cell(cell: Any) -> tuple[str, str, str]:
409
+ """Parse a table_v3/v4 cell dict into (attr_str, style_str, html_content)."""
410
+ if not isinstance(cell, dict):
411
+ text = format_money_figures(str(cell)) if cell else ""
412
+ return ("", "", h(text))
413
+
414
+ colspan = 1
415
+ rowspan = 1
416
+ text_val = str(cell.get("text", ""))
417
+ cs = cell.get("colspan")
418
+ rs = cell.get("rowspan")
419
+ if cs is not None and str(cs).isdigit():
420
+ colspan = int(cs)
421
+ if rs is not None and str(rs).isdigit():
422
+ rowspan = int(rs)
423
+
424
+ attr = ""
425
+ if colspan > 1:
426
+ attr += f' colspan="{colspan}"'
427
+ if rowspan > 1:
428
+ attr += f' rowspan="{rowspan}"'
429
+
430
+ style_name = str(cell.get("style", ""))
431
+ inline_css = _V3_STYLE_MAP.get(style_name, "")
432
+ style_str = f' style="{inline_css}"' if inline_css else ""
433
+
434
+ # Rich parts within cell (merged cells with multiple text blocks)
435
+ parts = cell.get("parts")
436
+ if isinstance(parts, list) and parts:
437
+ html_parts: list[str] = []
438
+ for p in parts:
439
+ if not isinstance(p, dict):
440
+ continue
441
+ pt = format_money_figures(str(p.get("text", "")))
442
+ if not pt:
443
+ continue
444
+ if p.get("bold"):
445
+ html_parts.append(f"<strong>{h(pt)}</strong>")
446
+ else:
447
+ html_parts.append(h(pt))
448
+ content = "<br><br>".join(html_parts) if html_parts else h(format_money_figures(text_val))
449
+ else:
450
+ content = h(format_money_figures(text_val))
451
+
452
+ return (attr, style_str, content)
453
+
454
+
455
+ # =========================================
456
+ # Global blocks renderer
457
+ # =========================================
458
+
459
+ def render_global_blocks(
460
+ section_key: str,
461
+ section_title: str,
462
+ json_data: dict | list,
463
+ debug: bool = False,
464
+ *,
465
+ universities: list[dict] | None = None,
466
+ ) -> str:
467
+ """Render a single global section's content.
468
+
469
+ Mirrors PHP renderGlobalBlocks() β€” handles steps, bullets, tables,
470
+ doc_v1, table_v2, summary_of_universities, etc.
471
+ """
472
+ html_out = ""
473
+ key_norm = section_key.lower().strip()
474
+
475
+ if not isinstance(json_data, dict):
476
+ json_data = {}
477
+
478
+ layout_norm = str(json_data.get("layout", "")).lower().strip()
479
+
480
+ # ── Section title ──
481
+ # Prefer the JSON-level title (display-ready) over the DB section_title
482
+ json_title = str(json_data.get("title", "")).strip() if isinstance(json_data, dict) else ""
483
+ title = json_title or section_title.strip()
484
+ if title and key_norm != "table_of_contents":
485
+ html_out += f'<h2 class="h2">{h(title)}</h2>'
486
+ _title_norm = title.lower()
487
+
488
+ # ── Steps ──
489
+ steps = json_data.get("steps")
490
+ if isinstance(steps, list):
491
+ step_num = 0
492
+ for s in steps:
493
+ if not isinstance(s, dict):
494
+ continue
495
+ step_num += 1
496
+ step_title = str(s.get("title", s.get("step_title", ""))).strip()
497
+ body = format_money_figures(str(s.get("body", s.get("description", ""))).strip())
498
+
499
+ html_out += '<div class="avoid-break" style="margin:0 0 4px;">'
500
+ if step_title:
501
+ html_out += f'<div class="h3">Step {step_num}: {h(step_title)}</div>'
502
+ if body:
503
+ html_out += f'<p class="p">{emphasize_keywords(body)}</p>'
504
+
505
+ links = s.get("links", [])
506
+ if isinstance(links, list) and links:
507
+ html_out += '<ul class="ul">'
508
+ for lnk in links:
509
+ if not isinstance(lnk, dict):
510
+ continue
511
+ label = str(lnk.get("label", "Link")).strip()
512
+ url = str(lnk.get("url", "")).strip()
513
+ if not url:
514
+ continue
515
+ html_out += f'<li><a href="{h(url)}" target="_blank" rel="noopener noreferrer">{h(label)}</a></li>'
516
+ html_out += "</ul>"
517
+
518
+ qr = str(s.get("qr_url", s.get("qr_image", ""))).strip()
519
+ if qr:
520
+ html_out += f'<img src="{h(qr)}" alt="QR" style="width:60px; height:60px; margin:4px 0;" />'
521
+
522
+ html_out += "</div>"
523
+ return html_out
524
+
525
+ # ── Bullets ──
526
+ has_bullets = isinstance(json_data.get("bullets"), list)
527
+ has_items = isinstance(json_data.get("items"), list)
528
+ if has_bullets or (layout_norm == "bullets_with_note" and has_items):
529
+ lst = json_data.get("items") if has_items else json_data.get("bullets")
530
+ html_out += '<ul class="ul">'
531
+ for b in lst:
532
+ b_str = format_money_figures(str(b).strip())
533
+ if not b_str:
534
+ continue
535
+ html_out += f"<li>{emphasize_keywords(b_str)}</li>"
536
+ html_out += "</ul>"
537
+
538
+ note = format_money_figures(str(json_data.get("note", json_data.get("footnote", ""))).strip())
539
+ if note:
540
+ html_out += f'<div class="note">{h(note)}</div>'
541
+ return html_out
542
+
543
+ # ── Basic table ──
544
+ cols = json_data.get("columns")
545
+ rows = json_data.get("rows")
546
+ if isinstance(cols, list) and isinstance(rows, list):
547
+ html_out += '<table class="tbl">'
548
+ if cols:
549
+ html_out += "<thead><tr>"
550
+ for c in cols:
551
+ html_out += f"<th>{h(str(c))}</th>"
552
+ html_out += "</tr></thead>"
553
+ html_out += "<tbody>"
554
+
555
+ for r in rows:
556
+ if not isinstance(r, (list, dict)):
557
+ continue
558
+ html_out += "<tr>"
559
+ if isinstance(r, dict):
560
+ for col_label in cols:
561
+ key_guess = re.sub(r"[^a-z0-9]+", "_", str(col_label).lower())
562
+ cell = r.get(key_guess, "")
563
+ html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
564
+ else:
565
+ for cell in r:
566
+ html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
567
+ html_out += "</tr>"
568
+
569
+ html_out += "</tbody></table>"
570
+ return html_out
571
+
572
+ # ── table_v2 ──
573
+ if layout_norm == "table_v2":
574
+ base_cols = json_data.get("base_columns", [])
575
+ groups = json_data.get("header_groups", [])
576
+ rows = json_data.get("rows", [])
577
+ if not isinstance(base_cols, list):
578
+ base_cols = []
579
+ if not isinstance(groups, list):
580
+ groups = []
581
+ if not isinstance(rows, list):
582
+ rows = []
583
+
584
+ all_cols: list[dict] = []
585
+ for c in base_cols:
586
+ if isinstance(c, dict):
587
+ all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))})
588
+ for g in groups:
589
+ if not isinstance(g, dict):
590
+ continue
591
+ g_cols = g.get("columns", [])
592
+ if not isinstance(g_cols, list):
593
+ g_cols = []
594
+ for c in g_cols:
595
+ if isinstance(c, dict):
596
+ all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))})
597
+
598
+ html_out += '<table class="tbl tbl-comparison"><thead>'
599
+ has_group_row = bool(groups)
600
+ if has_group_row:
601
+ html_out += "<tr>"
602
+ for c in base_cols:
603
+ if isinstance(c, dict):
604
+ html_out += f'<th rowspan="2">{h(str(c.get("label", "")))}</th>'
605
+ for g in groups:
606
+ if not isinstance(g, dict):
607
+ continue
608
+ g_cols = g.get("columns", [])
609
+ if not isinstance(g_cols, list):
610
+ g_cols = []
611
+ span = max(1, len(g_cols))
612
+ html_out += f'<th colspan="{span}">{h(str(g.get("label", "")))}</th>'
613
+ html_out += "</tr><tr>"
614
+ for g in groups:
615
+ if not isinstance(g, dict):
616
+ continue
617
+ g_cols = g.get("columns", [])
618
+ if not isinstance(g_cols, list):
619
+ g_cols = []
620
+ for c in g_cols:
621
+ if isinstance(c, dict):
622
+ html_out += f'<th>{h(str(c.get("label", "")))}</th>'
623
+ html_out += "</tr>"
624
+ else:
625
+ html_out += "<tr>"
626
+ for c in all_cols:
627
+ html_out += f'<th>{h(c.get("label", ""))}</th>'
628
+ html_out += "</tr>"
629
+
630
+ html_out += "</thead><tbody>"
631
+ for r in rows:
632
+ if not isinstance(r, dict):
633
+ continue
634
+ html_out += "<tr>"
635
+ for c in all_cols:
636
+ k = c.get("key", "")
637
+ val = r.get(k, "")
638
+ if isinstance(val, dict):
639
+ val = val.get("text", "")
640
+ html_out += f"<td>{h(format_money_figures(str(val)))}</td>"
641
+ html_out += "</tr>"
642
+ html_out += "</tbody></table>"
643
+ return html_out
644
+
645
+ # ── doc_v1 ──
646
+ if layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list):
647
+ for b in json_data["blocks"]:
648
+ if not isinstance(b, dict):
649
+ continue
650
+ btype = str(b.get("type", ""))
651
+
652
+ # Skip heading/subheading blocks that duplicate the section title
653
+ if btype in ("heading", "subheading"):
654
+ block_text = str(b.get("text", "")).strip().lower()
655
+ if block_text == _title_norm:
656
+ continue
657
+
658
+ if btype == "paragraph":
659
+ t = format_money_figures(str(b.get("text", "")))
660
+ if t.strip():
661
+ html_out += f'<p class="p">{emphasize_keywords(t)}</p>'
662
+
663
+ elif btype == "subheading":
664
+ t = format_money_figures(str(b.get("text", "")))
665
+ if t.strip():
666
+ html_out += f'<h3 class="h3 keep-with-next">{h(t)}</h3>'
667
+
668
+ elif btype == "bullets":
669
+ items = b.get("items", [])
670
+ if not isinstance(items, list):
671
+ items = []
672
+ html_out += '<ul class="ul">'
673
+ for it in items:
674
+ it_str = format_money_figures(str(it).strip())
675
+ if it_str:
676
+ html_out += f"<li>{emphasize_keywords(it_str)}</li>"
677
+ html_out += "</ul>"
678
+
679
+ elif btype == "numbered_list":
680
+ items = b.get("items", [])
681
+ if not isinstance(items, list):
682
+ items = []
683
+ html_out += '<ol class="ol">'
684
+ for it in items:
685
+ it_str = format_money_figures(str(it).strip())
686
+ if it_str:
687
+ html_out += f"<li>{emphasize_keywords(it_str)}</li>"
688
+ html_out += "</ol>"
689
+
690
+ elif btype == "note":
691
+ t = format_money_figures(str(b.get("text", "")))
692
+ if t.strip():
693
+ html_out += f'<div class="note">{h(t)}</div>'
694
+
695
+ elif btype == "note_inline":
696
+ parts = b.get("parts", [])
697
+ if not isinstance(parts, list):
698
+ parts = []
699
+ txt = ""
700
+ for p in parts:
701
+ if not isinstance(p, dict):
702
+ continue
703
+ t = format_money_figures(str(p.get("text", "")))
704
+ if not t:
705
+ continue
706
+ style = str(p.get("style", ""))
707
+ if style == "red_bold":
708
+ txt += f"<strong>{h(t)}</strong>"
709
+ else:
710
+ txt += h(t)
711
+ if re.sub(r"<[^>]+>", "", txt).strip():
712
+ html_out += f'<div class="note">{txt}</div>'
713
+
714
+ elif btype == "table_v1":
715
+ t_cols = b.get("columns", [])
716
+ t_rows = b.get("rows", [])
717
+ if not isinstance(t_cols, list):
718
+ t_cols = []
719
+ if not isinstance(t_rows, list):
720
+ t_rows = []
721
+ html_out += '<table class="tbl">'
722
+ if t_cols:
723
+ html_out += "<thead><tr>"
724
+ for c in t_cols:
725
+ html_out += f"<th>{h(str(c))}</th>"
726
+ html_out += "</tr></thead>"
727
+ html_out += "<tbody>"
728
+ for r in t_rows:
729
+ if not isinstance(r, list):
730
+ continue
731
+ html_out += "<tr>"
732
+ for cell in r:
733
+ html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
734
+ html_out += "</tr>"
735
+ html_out += "</tbody></table>"
736
+
737
+ elif btype == "table":
738
+ # Generic table (columns may be objects or strings, rows may be dicts or lists)
739
+ t_cols = b.get("columns", [])
740
+ t_rows = b.get("rows", [])
741
+ if not isinstance(t_cols, list):
742
+ t_cols = []
743
+ if not isinstance(t_rows, list):
744
+ t_rows = []
745
+ col_labels = []
746
+ col_keys = []
747
+ for c in t_cols:
748
+ if isinstance(c, dict):
749
+ col_labels.append(str(c.get("label", c.get("key", ""))))
750
+ col_keys.append(str(c.get("key", "")))
751
+ else:
752
+ col_labels.append(str(c))
753
+ col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower()))
754
+ html_out += '<table class="tbl">'
755
+ if col_labels:
756
+ html_out += "<thead><tr>"
757
+ for lbl in col_labels:
758
+ html_out += f"<th>{h(lbl)}</th>"
759
+ html_out += "</tr></thead>"
760
+ html_out += "<tbody>"
761
+ for r in t_rows:
762
+ html_out += "<tr>"
763
+ if isinstance(r, dict):
764
+ for k in col_keys:
765
+ cell = r.get(k, "")
766
+ html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
767
+ elif isinstance(r, list):
768
+ for cell in r:
769
+ html_out += f"<td>{h(format_money_figures(str(cell)))}</td>"
770
+ html_out += "</tr>"
771
+ html_out += "</tbody></table>"
772
+
773
+ elif btype in ("table_v3", "table_v4"):
774
+ t_rows = b.get("rows", [])
775
+ h_rows = b.get("header_rows", [])
776
+ col_widths = b.get("col_width_pct", [])
777
+ if not isinstance(t_rows, list):
778
+ t_rows = []
779
+ if not isinstance(h_rows, list):
780
+ h_rows = []
781
+ if not isinstance(col_widths, list):
782
+ col_widths = []
783
+
784
+ html_out += '<table class="tbl">'
785
+
786
+ # optional col widths
787
+ if col_widths:
788
+ html_out += "<colgroup>"
789
+ for w in col_widths:
790
+ html_out += f'<col style="width:{w}%">'
791
+ html_out += "</colgroup>"
792
+
793
+ # header rows
794
+ if h_rows:
795
+ html_out += "<thead>"
796
+ for hr in h_rows:
797
+ if not isinstance(hr, list):
798
+ continue
799
+ html_out += "<tr>"
800
+ for cell in hr:
801
+ c_attr, c_style, c_text = _parse_v3_cell(cell)
802
+ html_out += f"<th{c_attr}{c_style}>{c_text}</th>"
803
+ html_out += "</tr>"
804
+ html_out += "</thead>"
805
+
806
+ # body rows
807
+ html_out += "<tbody>"
808
+ for r in t_rows:
809
+ if not isinstance(r, list):
810
+ continue
811
+ html_out += "<tr>"
812
+ for cell in r:
813
+ c_attr, c_style, c_text = _parse_v3_cell(cell)
814
+ html_out += f"<td{c_attr}{c_style}>{c_text}</td>"
815
+ html_out += "</tr>"
816
+ html_out += "</tbody></table>"
817
+
818
+ return html_out
819
+
820
+ # ── Fallback ──
821
+ if "text" in json_data:
822
+ html_out += f'<p class="p">{h(format_money_figures(str(json_data["text"])))}</p>'
823
+
824
+ if not html_out.strip():
825
+ logger.warning(
826
+ "Empty section render for key=%s title=%s",
827
+ section_key, section_title,
828
+ )
829
+
830
+ return html_out
831
+
832
+
833
+ # =========================================
834
+ # University section renderer
835
+ # =========================================
836
+
837
+ def render_university_section(
838
+ uni_name: str,
839
+ sections: list[dict],
840
+ allow_remote: bool,
841
+ is_first_uni: bool,
842
+ include_inactive_programs: bool = False,
843
+ website_url: str = "",
844
+ anchor_id: str | None = None,
845
+ debug: bool = False,
846
+ stats: dict | None = None,
847
+ sort_order: int | None = None,
848
+ ) -> str:
849
+ """Render a single university section. Mirrors PHP renderUniversitySection."""
850
+ classes = ["uni"]
851
+ if not is_first_uni:
852
+ classes.append("page-break")
853
+
854
+ id_attr = f' id="{h(anchor_id)}"' if anchor_id else ""
855
+ sort_attr = f' data-sort="{h(str(sort_order))}"' if sort_order is not None else ""
856
+
857
+ out = f'<div class="{" ".join(classes)}"{id_attr}{sort_attr} data-section-key="university" data-section-title="{h(uni_name)}">'
858
+
859
+ has_stats = isinstance(stats, dict)
860
+ if has_stats:
861
+ stats["universities"] = stats.get("universities", 0) + 1
862
+
863
+ # Build map; merge duplicate "programs" sections
864
+ sec_map: dict[str, dict] = {}
865
+ for s in sections:
866
+ if not isinstance(s, dict):
867
+ continue
868
+ k = str(s.get("section_key", ""))
869
+ if not k:
870
+ continue
871
+ if k == "programs" and k in sec_map:
872
+ existing = sec_map["programs"].get("section_json", {})
873
+ incoming = s.get("section_json", {})
874
+ if not isinstance(existing, dict):
875
+ existing = {}
876
+ if not isinstance(incoming, dict):
877
+ incoming = {}
878
+ a = existing.get("programs", [])
879
+ b = incoming.get("programs", [])
880
+ if not isinstance(a, list):
881
+ a = []
882
+ if not isinstance(b, list):
883
+ b = []
884
+ existing["programs"] = a + b
885
+ sec_map["programs"]["section_json"] = existing
886
+ continue
887
+ sec_map[k] = s
888
+
889
+ # Campus image
890
+ img_section = sec_map.get("campus_image") or sec_map.get("image")
891
+ campus_url = ""
892
+ campus_cap = ""
893
+ if img_section:
894
+ j = img_section.get("section_json", {})
895
+ if isinstance(j, dict):
896
+ campus_url = str(j.get("image_url", "")).strip()
897
+ campus_cap = str(j.get("caption", "")).strip()
898
+
899
+ # Overview data + website
900
+ overview_json: dict | None = None
901
+ resolved_website = (website_url or "").strip()
902
+
903
+ if "overview" in sec_map:
904
+ overview_json = sec_map["overview"].get("section_json", {})
905
+ if not isinstance(overview_json, dict):
906
+ overview_json = {}
907
+ site_from_overview = get_any(
908
+ overview_json,
909
+ ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"],
910
+ )
911
+ if not resolved_website and site_from_overview:
912
+ resolved_website = site_from_overview
913
+
914
+ # 1. University title
915
+ if resolved_website:
916
+ if has_stats:
917
+ stats["university_links"] = stats.get("university_links", 0) + 1
918
+ out += (
919
+ f'<div class="uni-name"><a class="uni-name-link" href="{h(resolved_website)}" '
920
+ f'target="_blank" rel="noopener noreferrer">{h(uni_name)}</a></div>'
921
+ )
922
+ else:
923
+ out += f'<div class="uni-name">{h(uni_name)}</div>'
924
+
925
+ # 2-3. Two-column: Summary + Campus image
926
+ image_embedded = False
927
+ campus_cell = ""
928
+ if allow_remote and campus_url:
929
+ embedded = fetch_image_data_uri(campus_url)
930
+ if embedded:
931
+ image_embedded = True
932
+ campus_cell = f'<img class="campus-top-img" src="{h(embedded)}" alt="Campus Image" />'
933
+ if campus_cap:
934
+ campus_cell += f'<div class="campus-top-cap">{h(campus_cap)}</div>'
935
+ else:
936
+ campus_cell = '<div class="campus-placeholder-cell">Campus image unavailable</div>'
937
+ else:
938
+ campus_cell = '<div class="campus-placeholder-cell">Campus image unavailable</div>'
939
+
940
+ if has_stats:
941
+ if image_embedded:
942
+ stats["images_embedded"] = stats.get("images_embedded", 0) + 1
943
+ else:
944
+ stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1
945
+
946
+ summary_cell = ""
947
+ if overview_json is not None:
948
+ j = overview_json
949
+ founded = get_any(j, ["founded", "Founded"])
950
+ total = get_any(j, ["total_students", "Total Students"])
951
+ undergrad = get_any(j, ["undergraduates", "Undergraduate Students", "undergraduate_students"])
952
+ postgrad = get_any(j, ["postgraduate_students", "Postgraduate Students"])
953
+ acc_rate = get_any(j, ["acceptance_rate", "Acceptance Rate"])
954
+ location = get_any(j, ["location", "Location"])
955
+ tuition = get_any(j, [
956
+ "tuition_out_of_state_yearly",
957
+ "Yearly Out of State Tuition Fees",
958
+ "Yearly Out-of-State Tuition Fees",
959
+ "Yearly Tuition Fees",
960
+ "Yearly Out-of-State Tuition Fees:",
961
+ ])
962
+
963
+ summary_cell += '<div class="summary-title">Summary info</div>'
964
+ summary_cell += '<ul class="summary-ul">'
965
+ if founded:
966
+ summary_cell += f'<li><span class="lbl">Founded:</span> {h(founded)}</li>'
967
+ if total:
968
+ summary_cell += f'<li><span class="lbl">Total Students:</span> {h(total)}</li>'
969
+ if undergrad:
970
+ summary_cell += f'<li><span class="lbl">Undergraduate Students:</span> {h(undergrad)}</li>'
971
+ if postgrad:
972
+ summary_cell += f'<li><span class="lbl">Postgraduate Students:</span> {h(postgrad)}</li>'
973
+ if acc_rate or location:
974
+ summary_cell += "<li>"
975
+ if acc_rate:
976
+ summary_cell += f'<span class="lbl">Acceptance Rate:</span> {h(acc_rate)} '
977
+ if location:
978
+ summary_cell += f'<span class="lbl">Location:</span> {h(location)}'
979
+ summary_cell += "</li>"
980
+ if tuition:
981
+ summary_cell += f'<li><span class="lbl">Yearly Tuition/Out-of-State Tuition:</span> {h(tuition)}</li>'
982
+ summary_cell += "</ul>"
983
+
984
+ if resolved_website:
985
+ if has_stats:
986
+ stats["website_rows"] = stats.get("website_rows", 0) + 1
987
+ summary_cell += (
988
+ f'<div class="uni-website"><span class="lbl">Website:</span> '
989
+ f'<a href="{h(resolved_website)}" target="_blank" rel="noopener noreferrer">'
990
+ f'{h(resolved_website)}</a></div>'
991
+ )
992
+
993
+ out += (
994
+ '<table class="school-top-table" cellspacing="0" cellpadding="0"><tr>'
995
+ f'<td class="school-top-summary" style="vertical-align:top;">{summary_cell}</td>'
996
+ f'<td class="school-top-campus" style="vertical-align:top;">{campus_cell}</td>'
997
+ "</tr></table>"
998
+ )
999
+
1000
+ # 4. Benefits
1001
+ if "benefits" in sec_map:
1002
+ j = sec_map["benefits"].get("section_json", {})
1003
+ if not isinstance(j, dict):
1004
+ j = {}
1005
+ benefits = j.get("benefits", [])
1006
+ if not isinstance(benefits, list):
1007
+ benefits = []
1008
+
1009
+ out += '<div class="benefits-section">'
1010
+ out += '<div class="benefits-bar">Benefits for ISP students at this school</div>'
1011
+ if benefits:
1012
+ out += '<ul class="benefits-ul">'
1013
+ for b in benefits:
1014
+ b_str = str(b).strip()
1015
+ if not b_str:
1016
+ continue
1017
+ out += f'<li class="benefit-li"><span class="benefit-bullet">&bull;</span> <span class="benefit-text">{h(b_str)}</span></li>'
1018
+ out += "</ul>"
1019
+ else:
1020
+ out += '<div class="muted" style="margin:4px 0 6px;">No benefits listed.</div>'
1021
+ out += "</div>"
1022
+
1023
+ # 5. Programs
1024
+ if "programs" in sec_map:
1025
+ j = sec_map["programs"].get("section_json", {})
1026
+ if not isinstance(j, dict):
1027
+ j = {}
1028
+ programs = j.get("programs", [])
1029
+ if not isinstance(programs, list):
1030
+ programs = []
1031
+
1032
+ # Filter inactive
1033
+ if not include_inactive_programs:
1034
+ def _is_active(p: dict) -> bool:
1035
+ flag = p.get("program_active", p.get("is_active", p.get("active", 1)))
1036
+ return is_truthy(flag)
1037
+
1038
+ programs = [p for p in programs if isinstance(p, dict) and _is_active(p)]
1039
+
1040
+ out += (
1041
+ '<div class="qualify">To qualify for The International Scholars Program at '
1042
+ f"{h(uni_name)}, you must be willing to study any of the following programs:</div>"
1043
+ )
1044
+
1045
+ if programs:
1046
+ out += '<table class="programs">'
1047
+ out += (
1048
+ '<th style="width:34%">Program</th>'
1049
+ '<th style="width:33%">Designation</th>'
1050
+ '<th style="width:33%">Entrance Examination</th></tr></thead><tbody>'
1051
+ )
1052
+
1053
+ for p in programs:
1054
+ if not isinstance(p, dict):
1055
+ continue
1056
+
1057
+ program_name = str(p.get("program_name", "")).strip()
1058
+ link = str(p.get("program_link", "")).strip()
1059
+ if not link and isinstance(p.get("program_links"), dict):
1060
+ link = str(p["program_links"].get("web_link", "")).strip()
1061
+
1062
+ program_name_html = h(program_name)
1063
+ if link:
1064
+ program_name_html = f'<a href="{h(link)}" target="_blank" rel="noopener noreferrer">{program_name_html}</a>'
1065
+
1066
+
1067
+
1068
+ entrance = str(p.get("entrance_exam", p.get("entrance_examination", "")))
1069
+ designation = str(p.get("designation", ""))
1070
+ out += (
1071
+ f"<tr>"
1072
+ f"<td>{program_name_html}</td>"
1073
+ f"<td>{h(designation)}</td>"
1074
+ f"<td>{h(entrance)}</td>"
1075
+ f"</tr>"
1076
+ )
1077
+
1078
+ out += "</tbody></table>"
1079
+ else:
1080
+ out += '<div class="muted" style="margin:0 0 6px;">No programs listed.</div>'
1081
+
1082
+ # Extra sections
1083
+ skip_keys = {"campus_image", "image", "overview", "benefits", "programs"}
1084
+ for s in sections:
1085
+ if not isinstance(s, dict):
1086
+ continue
1087
+ k = str(s.get("section_key", ""))
1088
+ if not k or k in skip_keys:
1089
+ continue
1090
+ title = str(s.get("section_title", ""))
1091
+ j = s.get("section_json", {})
1092
+ if not isinstance(j, dict):
1093
+ j = {}
1094
+ out += render_global_blocks(k, title, j, debug)
1095
+
1096
+ out += "</div>"
1097
+ return out
app/services/utils.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions shared across renderers.
2
+
3
+ Mirrors PHP helpers: h(), formatMoneyFigures(), handbook_anchor(), etc.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import html
9
+ import re
10
+
11
+
12
+ def h(s: str) -> str:
13
+ """HTML-escape (mirrors PHP h())."""
14
+ return html.escape(str(s), quote=True)
15
+
16
+
17
+ def is_assoc(a: list | dict) -> bool:
18
+ """Check if an array is associative (dict-like) vs sequential list."""
19
+ return isinstance(a, dict)
20
+
21
+
22
+ def hb_slug(s: str) -> str:
23
+ """Slug helper for anchors."""
24
+ tmp = s.lower().strip()
25
+ tmp = re.sub(r"[^a-z0-9]+", "_", tmp, flags=re.IGNORECASE)
26
+ tmp = re.sub(r"_+", "_", tmp)
27
+ return tmp.strip("_")
28
+
29
+
30
+ def handbook_anchor(prefix: str, text: str, idx: int) -> str:
31
+ """Normalise a string into a safe anchor id. Mirrors PHP handbook_anchor."""
32
+ base = text.lower().strip()
33
+ base = re.sub(r"[^a-z0-9]+", "-", base, flags=re.IGNORECASE)
34
+ base = base.strip("-")
35
+ if not base:
36
+ base = f"{prefix}-{idx}"
37
+ return f"{prefix}-{base}-{idx}"
38
+
39
+
40
+ def is_truthy(val) -> bool:
41
+ """Mirrors PHP handbook_true."""
42
+ if isinstance(val, bool):
43
+ return val
44
+ if isinstance(val, int):
45
+ return val != 0
46
+ v = str(val).lower().strip()
47
+ return v not in ("0", "false", "")
48
+
49
+
50
+ def format_money_figures(text: str) -> str:
51
+ """Normalize all monetary figures to "USD X,XXX" format.
52
+
53
+ - Converts existing $X,XXX β†’ USD X,XXX
54
+ - Normalizes bare large numbers (1,000+) β†’ USD X,XXX
55
+ - Formats with commas
56
+ - Currency type is always USD (no $ symbol)
57
+ """
58
+ if not text:
59
+ return text
60
+
61
+ # Step 1: Convert "$X" β†’ "USD X" directly (preserves ALL dollar amounts)
62
+ def _dollar_to_usd(m: re.Match) -> str:
63
+ num_str = m.group(1).replace(",", "")
64
+ try:
65
+ num = float(num_str)
66
+ except ValueError:
67
+ return m.group(0)
68
+ if "." in m.group(1):
69
+ dec_part = m.group(1).split(".")[-1]
70
+ formatted = f"{num:,.{len(dec_part)}f}"
71
+ elif num == int(num):
72
+ formatted = f"{int(num):,}"
73
+ else:
74
+ formatted = f"{num:,.2f}"
75
+ return "USD " + formatted
76
+
77
+ text = re.sub(r'\$([\d,]+(?:\.\d+)?)', _dollar_to_usd, text)
78
+
79
+ # Step 2: Normalize existing "USD X,XXX" for consistent comma formatting
80
+ def _normalize_usd(m: re.Match) -> str:
81
+ num_str = m.group(1).replace(",", "")
82
+ try:
83
+ num = float(num_str)
84
+ except ValueError:
85
+ return m.group(0)
86
+ if "." in m.group(1):
87
+ dec_part = m.group(1).split(".")[-1]
88
+ formatted = f"{num:,.{len(dec_part)}f}"
89
+ elif num == int(num):
90
+ formatted = f"{int(num):,}"
91
+ else:
92
+ formatted = f"{num:,.2f}"
93
+ return "USD " + formatted
94
+
95
+ text = re.sub(r'\bUSD\s+([\d,]+(?:\.\d+)?)', _normalize_usd, text, flags=re.IGNORECASE)
96
+
97
+ # Step 3: Add "USD " to bare large numbers (4+ digits or comma-formatted)
98
+ # that aren't already preceded by "USD "
99
+ def _format_bare_large(m: re.Match) -> str:
100
+ num_str = m.group(1).replace(",", "")
101
+ dec = m.group(2) if m.group(2) else ""
102
+ try:
103
+ num = float(num_str)
104
+ except ValueError:
105
+ return m.group(0)
106
+ if dec:
107
+ formatted = f"{num:,.{len(dec)}f}"
108
+ else:
109
+ formatted = f"{num:,.0f}"
110
+ return "USD " + formatted
111
+
112
+ text = re.sub(
113
+ r"(?<!\d)(?<!USD )((?:\d{1,3}(?:,\d{3})+)|(?:\d{4,}))(?:\.(\d+))?(?![%\d/])",
114
+ _format_bare_large,
115
+ text,
116
+ )
117
+
118
+ return text
119
+
120
+
121
+ def ensure_program_options_pair(text: str) -> str:
122
+ """Ensure REGULAR/PRIME program options appear together when either appears.
123
+
124
+ If only one of the two appears in text, append "(REGULAR and PRIME)"
125
+ to preserve source meaning while enforcing consistency.
126
+ """
127
+ if not text:
128
+ return text
129
+
130
+ has_regular = bool(re.search(r"\bREGULAR\b", text, flags=re.IGNORECASE))
131
+ has_prime = bool(re.search(r"\bPRIME\b", text, flags=re.IGNORECASE))
132
+
133
+ if has_regular ^ has_prime:
134
+ if re.search(r"\(\s*REGULAR\s+and\s+PRIME\s*\)", text, flags=re.IGNORECASE):
135
+ return text
136
+ return text.rstrip() + " (REGULAR and PRIME)"
137
+
138
+ return text
139
+
140
+
141
+ def sort_sections_stable(sections: list[dict]) -> list[dict]:
142
+ """Stable sort: sort_order ASC, then id ASC, then insertion order."""
143
+ for i, s in enumerate(sections):
144
+ s.setdefault("_i", i)
145
+
146
+ def sort_key(s: dict):
147
+ so = s.get("sort_order")
148
+ sid = s.get("id")
149
+ so_key = (0, so) if so is not None else (1, 0)
150
+ sid_key = (0, sid) if sid is not None else (1, 0)
151
+ return (so_key, sid_key, s.get("_i", 0))
152
+
153
+ sections.sort(key=sort_key)
154
+ for s in sections:
155
+ s.pop("_i", None)
156
+ return sections
157
+
158
+
159
+ def get_any(d: dict, keys: list[str]) -> str:
160
+ """Return the first non-empty string value found for one of the keys."""
161
+ for k in keys:
162
+ v = d.get(k)
163
+ if v is None or isinstance(v, (dict, list)):
164
+ continue
165
+ t = str(v).strip()
166
+ if t:
167
+ return t
168
+ return ""
169
+
170
+
171
+ def emphasize_keywords(text: str) -> str:
172
+ """Add bold HTML emphasis to key handbook terms in already-escaped text.
173
+
174
+ Bolds: REGULAR, PRIME, dollar amounts ($X,XXX), and other critical terms.
175
+ Input must already be HTML-escaped. Returns HTML with <strong> tags.
176
+ """
177
+ if not text:
178
+ return text
179
+
180
+ escaped = h(text)
181
+
182
+ # Bold REGULAR and PRIME (case-insensitive, whole word)
183
+ escaped = re.sub(
184
+ r'\b(REGULAR|PRIME)\b',
185
+ r'<strong>\1</strong>',
186
+ escaped,
187
+ flags=re.IGNORECASE,
188
+ )
189
+
190
+ # Bold USD amounts like USD 1,000 or USD 500
191
+ escaped = re.sub(
192
+ r'\b(USD\s+[\d,]+(?:\.\d+)?)',
193
+ r'<strong>\1</strong>',
194
+ escaped,
195
+ flags=re.IGNORECASE,
196
+ )
197
+
198
+ # Bold standalone USD
199
+ escaped = re.sub(
200
+ r'\b(USD)\b(?!\s*[\d,])',
201
+ r'<strong>\1</strong>',
202
+ escaped,
203
+ flags=re.IGNORECASE,
204
+ )
205
+
206
+ # Bold dollar-sign amounts like $20, $1,000, $1,000.00
207
+ escaped = re.sub(
208
+ r'(\$[\d,]+(?:\.\d+)?)',
209
+ r'<strong>\1</strong>',
210
+ escaped,
211
+ )
212
+
213
+ # Bold specific GPA values 2.8, 3.4 and 4.0
214
+ escaped = re.sub(
215
+ r'\b(2\.8|3\.4|4\.0)\b',
216
+ r'<strong>\1</strong>',
217
+ escaped,
218
+ )
219
+
220
+ # Bold key qualification and geo terms.
221
+ escaped = re.sub(
222
+ r'\b(GPA\s*\(\s*Undergraduate\s+Requirement\s*\)|GPA|High\s+School\s+grades|Global|Uganda|Kenya)\b',
223
+ r'<strong>\1</strong>',
224
+ escaped,
225
+ flags=re.IGNORECASE,
226
+ )
227
+
228
+ # Bold refund policy phrase.
229
+ escaped = re.sub(
230
+ r'\b(Refund\s+Policy)\b',
231
+ r'<strong>\1</strong>',
232
+ escaped,
233
+ flags=re.IGNORECASE,
234
+ )
235
+
236
+ return escaped
237
+
238
+
239
+ def linkify_urls(text: str) -> str:
240
+ """Convert URLs in text to clickable <a> tags with target="_blank".
241
+
242
+ Detects http/https URLs and converts them to proper anchor tags.
243
+ Input should be plain text or already HTML-escaped.
244
+ Returns HTML with <a> tags.
245
+ """
246
+ if not text:
247
+ return text
248
+
249
+ # Detect and convert http/https URLs to clickable links
250
+ # Pattern: http:// or https:// followed by domain and optional path
251
+ url_pattern = r'(https?://[^\s<)]+)'
252
+
253
+ def make_link(match):
254
+ url = match.group(1)
255
+ # Clean up trailing punctuation that's likely not part of URL
256
+ url = url.rstrip('.,;:!?)\'\"')
257
+ return f'<a href="{h(url)}" target="_blank" rel="noopener noreferrer">{h(url)}</a>'
258
+
259
+ return re.sub(url_pattern, make_link, text)
app/static/css/print.css ADDED
@@ -0,0 +1,1344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* =========================================================
2
+ ISP HANDBOOK - CORRECTED PRINT CSS
3
+ Uses real page margins (2.54 cm all sides)
4
+ Removes negative-margin / negative-offset layout tricks
5
+ Keeps decorative elements from breaking content flow
6
+ ========================================================= */
7
+
8
+ /* ------------------------------
9
+ PAGE SETUP
10
+ ------------------------------ */
11
+ @page {
12
+ size: A4;
13
+ margin: 2.54cm;
14
+ }
15
+
16
+ /* Optional cover page: use only if your renderer supports named pages */
17
+ @page cover {
18
+ size: A4;
19
+ margin: 0;
20
+ }
21
+
22
+ /* ------------------------------
23
+ RESET
24
+ ------------------------------ */
25
+ *,
26
+ *::before,
27
+ *::after {
28
+ box-sizing: border-box;
29
+ }
30
+
31
+ html,
32
+ body {
33
+ margin: 0;
34
+ padding: 0;
35
+ font-family: "Century Gothic", "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
36
+ font-size: 10pt;
37
+ line-height: 1.45;
38
+ color: #1a1a1a;
39
+ text-align: justify;
40
+ -webkit-print-color-adjust: exact;
41
+ print-color-adjust: exact;
42
+ background: #ffffff;
43
+ }
44
+
45
+ /* ------------------------------
46
+ EXTERNAL PLAYWRIGHT HEADER/FOOTER
47
+ Keep hidden in body flow if using templates
48
+ ------------------------------ */
49
+ .page-header,
50
+ .page-footer {
51
+ display: none !important;
52
+ }
53
+
54
+ .page-header img {
55
+ display: block;
56
+ width: 8.45in;
57
+ height: 1.03in;
58
+ }
59
+
60
+ /* ------------------------------
61
+ SAFE CONTENT WRAPPER
62
+ ------------------------------ */
63
+ .page-content {
64
+ display: block;
65
+ width: 100%;
66
+ max-width: 100%;
67
+ margin: 0;
68
+ padding: 0;
69
+ position: relative;
70
+ z-index: 1;
71
+ overflow: visible;
72
+ word-wrap: break-word;
73
+ overflow-wrap: break-word;
74
+ }
75
+
76
+ /* Prevent accidental blank page after cover */
77
+ .cover-page+.page-content,
78
+ .cover-page+.toc-page,
79
+ .cover-page+.section-block {
80
+ page-break-before: auto;
81
+ break-before: auto;
82
+ }
83
+
84
+ /* Prevent accidental blank page from a leading .page-break */
85
+ .page-content> :first-child.page-break,
86
+ .page-content> :first-child.section-block.page-break {
87
+ page-break-before: auto;
88
+ break-before: auto;
89
+ }
90
+
91
+ /* ------------------------------
92
+ DECORATIVE RIGHT-SIDE LABEL
93
+ Hidden in the base HTML. Rendered as a PDF overlay
94
+ by pdf_renderer.py onto content pages only.
95
+ Kept here so the <img> loads for src extraction.
96
+ Dimensions: 2.5cm Γ— 24.6cm (vertical strip, right edge, matches Word image4.png)
97
+ ------------------------------ */
98
+ .hb-right-label {
99
+ position: absolute;
100
+ top: -9999px;
101
+ left: -9999px;
102
+ width: 1px;
103
+ height: 1px;
104
+ overflow: hidden;
105
+ pointer-events: none;
106
+ }
107
+
108
+ .hb-right-label img {
109
+ display: block;
110
+ }
111
+
112
+ /* ------------------------------
113
+ TYPOGRAPHY β€” matching Word handbook exactly
114
+ H1: 12pt, #0263A3, uppercase, no bold, no border
115
+ H2-H6: color varies by context, no bold, no border
116
+ ------------------------------ */
117
+ .hb-heading-1,
118
+ .h2,
119
+ h1 {
120
+ font-size: 12pt;
121
+ font-weight: 700;
122
+ color: #0263A3;
123
+ margin: 4pt 0 10pt;
124
+ padding-bottom: 0;
125
+ border-bottom: none;
126
+ line-height: 1.25;
127
+ text-transform: uppercase;
128
+ text-align: left;
129
+ text-decoration: none;
130
+ page-break-after: avoid;
131
+ break-after: avoid;
132
+ page-break-inside: avoid;
133
+ break-inside: avoid;
134
+ }
135
+
136
+ .hb-heading-2,
137
+ .h3,
138
+ h2,
139
+ h3,
140
+ h4,
141
+ h5,
142
+ h6 {
143
+ font-size: 11pt;
144
+ font-weight: 700;
145
+ color: #199970;
146
+ margin: 12pt 0 6pt;
147
+ padding-bottom: 0;
148
+ border-bottom: none;
149
+ line-height: 1.25;
150
+ text-align: left;
151
+ text-decoration: none;
152
+ page-break-after: avoid;
153
+ break-after: avoid;
154
+ page-break-inside: avoid;
155
+ break-inside: avoid;
156
+ }
157
+
158
+ .hb-paragraph,
159
+ .p,
160
+ p {
161
+ margin: 6pt 0 2pt;
162
+ font-size: 10pt;
163
+ line-height: 1.5;
164
+ text-align: justify;
165
+ orphans: 3;
166
+ widows: 3;
167
+ }
168
+
169
+ strong,
170
+ b {
171
+ font-weight: 700;
172
+ }
173
+
174
+ em,
175
+ i {
176
+ font-style: italic;
177
+ }
178
+
179
+ a,
180
+ a:visited {
181
+ color: #0263A3;
182
+ text-decoration: none;
183
+ border-bottom: 0.5pt solid #0263A3;
184
+ word-break: break-word;
185
+ }
186
+
187
+ /* ------------------------------
188
+ LISTS
189
+ ------------------------------ */
190
+ .hb-bullet-list,
191
+ .ul,
192
+ ul.hb-bullet-list {
193
+ list-style: none !important;
194
+ margin: 4pt 0 10pt 16pt;
195
+ padding: 0;
196
+ font-size: 9.5pt;
197
+ }
198
+
199
+ .hb-bullet-list li,
200
+ .ul li,
201
+ ul.hb-bullet-list li {
202
+ position: relative;
203
+ margin: 0 0 5pt;
204
+ padding-left: 16pt;
205
+ line-height: 1.45;
206
+ page-break-inside: avoid;
207
+ break-inside: avoid;
208
+ }
209
+
210
+ .hb-bullet-list li::before,
211
+ .ul li::before,
212
+ ul.hb-bullet-list li::before {
213
+ content: "\27A4";
214
+ position: absolute;
215
+ left: 0;
216
+ top: 0;
217
+ color: #0263A3;
218
+ font-size: 8pt;
219
+ font-weight: 700;
220
+ }
221
+
222
+ /* ordered lists */
223
+ .hb-numbered-list,
224
+ .ol,
225
+ ol.hb-numbered-list {
226
+ list-style: decimal !important;
227
+ margin: 4pt 0 10pt 18pt;
228
+ padding: 0;
229
+ font-size: 9.5pt;
230
+ }
231
+
232
+ .hb-numbered-list li,
233
+ .ol li,
234
+ ol.hb-numbered-list li {
235
+ margin: 0 0 5pt;
236
+ padding-left: 2pt;
237
+ line-height: 1.45;
238
+ page-break-inside: avoid;
239
+ break-inside: avoid;
240
+ }
241
+
242
+ .hb-numbered-list li::before,
243
+ .ol li::before,
244
+ ol.hb-numbered-list li::before {
245
+ content: none !important;
246
+ }
247
+
248
+ /* Sub-bullets β€” checkmarks, indented under parent bullet */
249
+ .hb-sub-bullets {
250
+ margin-left: 32pt !important;
251
+ }
252
+
253
+ .hb-sub-bullets li::before {
254
+ content: "\2713" !important;
255
+ color: #199970;
256
+ font-size: 9pt;
257
+ font-weight: 700;
258
+ }
259
+
260
+ /* ------------------------------
261
+ NOTES / EMPHASIS
262
+ Guideline requires full bold text;
263
+ NOTE and ONLY IF in red.
264
+ ------------------------------ */
265
+ .hb-note,
266
+ .note {
267
+ margin: 10pt 0 12pt;
268
+ padding: 8pt 10pt 8pt 14pt;
269
+ border-left: 3.5pt solid #C00000;
270
+ background: #FFF8F8;
271
+ border-radius: 0 2pt 2pt 0;
272
+ font-size: 10pt;
273
+ line-height: 1.45;
274
+ font-weight: 700;
275
+ page-break-inside: avoid;
276
+ break-inside: avoid;
277
+ }
278
+
279
+ .hb-note-keyword,
280
+ .note-keyword,
281
+ .note .keyword,
282
+ .only-if {
283
+ color: #C00000;
284
+ font-weight: 700;
285
+ }
286
+
287
+ /* ------------------------------
288
+ BOLD EMPHASIS FOR KEY INFORMATION
289
+ Payment amounts, REGULAR, PRIME, critical terms
290
+ ------------------------------ */
291
+ .hb-bold-keyword {
292
+ font-weight: 700;
293
+ }
294
+
295
+ .hb-red-keyword {
296
+ font-weight: 700;
297
+ color: #C00000;
298
+ }
299
+
300
+ /* ------------------------------
301
+ SCHOOL INFO - Green color for school information
302
+ ------------------------------ */
303
+ .hb-school-info {
304
+ color: #199970;
305
+ font-size: 9.5px;
306
+ }
307
+
308
+ /* ------------------------------
309
+ HELPERS
310
+ ------------------------------ */
311
+ .page-break {
312
+ page-break-before: always;
313
+ break-before: page;
314
+ }
315
+
316
+ .avoid-break {
317
+ page-break-inside: avoid;
318
+ break-inside: avoid;
319
+ }
320
+
321
+ .keep-with-next {
322
+ page-break-after: avoid;
323
+ break-after: avoid;
324
+ }
325
+
326
+ .clearfix::after {
327
+ content: "";
328
+ display: block;
329
+ clear: both;
330
+ }
331
+
332
+ .debug-block {
333
+ border: 2px dashed #cc0000;
334
+ padding: 12px;
335
+ background: #fffbe6;
336
+ page-break-inside: avoid;
337
+ break-inside: avoid;
338
+ }
339
+
340
+ /* ------------------------------
341
+ COVER PAGE
342
+ Prefer named page.
343
+ If unsupported by renderer, cover will still render safely.
344
+ ------------------------------ */
345
+ .cover-page {
346
+ page: cover;
347
+ page-break-after: always;
348
+ break-after: page;
349
+ margin: 0;
350
+ padding: 0;
351
+ position: relative;
352
+ overflow: hidden;
353
+ }
354
+
355
+ .cover-img {
356
+ display: block;
357
+ width: 100%;
358
+ height: auto;
359
+ }
360
+
361
+ /* ------------------------------
362
+ FULL-PAGE IMAGE PAGES
363
+ Only use when intentionally full-page.
364
+ ------------------------------ */
365
+ .fullpage-img-wrap {
366
+ page: cover;
367
+ page-break-before: always;
368
+ break-before: page;
369
+ page-break-after: always;
370
+ break-after: page;
371
+ margin: 0;
372
+ padding: 0;
373
+ position: relative;
374
+ overflow: hidden;
375
+ }
376
+
377
+ .fullpage-img {
378
+ display: block;
379
+ width: 100%;
380
+ height: auto;
381
+ }
382
+
383
+ img {
384
+ max-width: 100%;
385
+ height: auto;
386
+ }
387
+
388
+ /* ------------------------------
389
+ TABLES - GLOBAL
390
+ Clean styling matching Word handbook: white cells,
391
+ bold headers, thin black borders, no colored fills.
392
+ ------------------------------ */
393
+ table {
394
+ width: 100%;
395
+ border-collapse: collapse;
396
+ table-layout: fixed;
397
+ max-width: 100%;
398
+ overflow: hidden;
399
+ page-break-inside: auto;
400
+ break-inside: auto;
401
+ }
402
+
403
+ thead {
404
+ display: table-header-group;
405
+ }
406
+
407
+ tfoot {
408
+ display: table-row-group;
409
+ }
410
+
411
+ tr {
412
+ page-break-inside: avoid;
413
+ break-inside: avoid;
414
+ }
415
+
416
+ .hb-table,
417
+ .tbl {
418
+ width: 100%;
419
+ border-collapse: collapse;
420
+ table-layout: fixed;
421
+ max-width: 100%;
422
+ margin: 10pt 0 14pt;
423
+ font-size: 9.5pt;
424
+ line-height: 1.35;
425
+ border: 0.75pt solid #000000;
426
+ }
427
+
428
+ .hb-table th,
429
+ .hb-table td,
430
+ .tbl th,
431
+ .tbl td {
432
+ border: 0.75pt solid #000000;
433
+ padding: 6pt 8pt;
434
+ vertical-align: top;
435
+ word-wrap: break-word;
436
+ overflow-wrap: break-word;
437
+ font-size: 9.5pt;
438
+ line-height: 1.35;
439
+ background: #ffffff;
440
+ color: #1a1a1a;
441
+ text-align: left;
442
+ }
443
+
444
+ .hb-table th,
445
+ .tbl th {
446
+ font-weight: 700;
447
+ text-transform: uppercase;
448
+ }
449
+
450
+ /* comparison table */
451
+ .hb-table-comparison,
452
+ .tbl-comparison {
453
+ font-size: 9pt;
454
+ }
455
+
456
+ .hb-table-comparison th,
457
+ .tbl-comparison th {
458
+ text-align: left;
459
+ font-size: 9pt;
460
+ font-weight: 700;
461
+ text-transform: uppercase;
462
+ }
463
+
464
+ /* Section-divider row: teal background matching Word doc #31849B */
465
+ .hb-table th.is-regular-col,
466
+ .hb-table td.is-regular-col,
467
+ .tbl th.is-regular-col,
468
+ .tbl td.is-regular-col {
469
+ color: #ffffff;
470
+ font-weight: 700;
471
+ background: #31849B;
472
+ }
473
+
474
+ .hb-table td.is-regular-col,
475
+ .tbl td.is-regular-col {
476
+ color: #31849B;
477
+ background: transparent;
478
+ }
479
+
480
+ .hb-table th.is-prime-col,
481
+ .hb-table td.is-prime-col,
482
+ .tbl th.is-prime-col,
483
+ .tbl td.is-prime-col {
484
+ color: #ffffff;
485
+ font-weight: 700;
486
+ background: #1F497D;
487
+ }
488
+
489
+ .hb-table td.is-prime-col,
490
+ .tbl td.is-prime-col {
491
+ color: #1F497D;
492
+ background: transparent;
493
+ }
494
+
495
+ /* ------------------------------
496
+ PROGRAM TABLES
497
+ Matching Word handbook: bold green (#199970) header text,
498
+ white cells, thin black borders, no colored fills.
499
+ ------------------------------ */
500
+ .hb-programs,
501
+ table.programs {
502
+ width: 100%;
503
+ border-collapse: collapse;
504
+ table-layout: fixed;
505
+ max-width: 100%;
506
+ margin: 10pt 0 14pt;
507
+ border: 0.75pt solid #000000;
508
+ font-family: "Century Gothic", "Segoe UI", sans-serif;
509
+ }
510
+
511
+ .hb-programs th,
512
+ .hb-programs td,
513
+ table.programs th,
514
+ table.programs td {
515
+ border: 0.75pt solid #000000;
516
+ padding: 5pt 6pt;
517
+ vertical-align: top;
518
+ word-wrap: break-word;
519
+ overflow-wrap: break-word;
520
+ font-size: 8.75pt;
521
+ line-height: 1.3;
522
+ background: #ffffff;
523
+ color: #1a1a1a;
524
+ }
525
+
526
+ .hb-programs th,
527
+ table.programs th {
528
+ font-weight: 700;
529
+ background: #ffffff;
530
+ color: #199970;
531
+ text-transform: uppercase;
532
+ text-align: left;
533
+ letter-spacing: 0.3px;
534
+ font-size: 8.5pt;
535
+ padding: 6pt 6pt;
536
+ }
537
+
538
+ .hb-programs td,
539
+ table.programs td {
540
+ text-align: left;
541
+ }
542
+
543
+ .hb-programs td a,
544
+ table.programs td a {
545
+ text-decoration: none;
546
+ border-bottom: none;
547
+ color: #1c75bc;
548
+ font-weight: 700;
549
+ }
550
+
551
+ .hb-programs th:nth-child(1),
552
+ .hb-programs td:nth-child(1),
553
+ table.programs th:nth-child(1),
554
+ table.programs td:nth-child(1) {
555
+ width: 22%;
556
+ }
557
+
558
+ .hb-programs th:nth-child(2),
559
+ .hb-programs td:nth-child(2),
560
+ table.programs th:nth-child(2),
561
+ table.programs td:nth-child(2) {
562
+ width: 14%;
563
+ }
564
+
565
+ .hb-programs th:nth-child(3),
566
+ .hb-programs td:nth-child(3),
567
+ table.programs th:nth-child(3),
568
+ table.programs td:nth-child(3) {
569
+ width: 16%;
570
+ }
571
+
572
+ .hb-programs th:nth-child(4),
573
+ .hb-programs td:nth-child(4),
574
+ table.programs th:nth-child(4),
575
+ table.programs td:nth-child(4) {
576
+ width: 30%;
577
+ }
578
+
579
+ .hb-programs th:nth-child(5),
580
+ .hb-programs td:nth-child(5),
581
+ table.programs th:nth-child(5),
582
+ table.programs td:nth-child(5) {
583
+ width: 18%;
584
+ }
585
+
586
+ /* career list inside programs table */
587
+ .hb-career-list,
588
+ .career-list {
589
+ margin: 0;
590
+ padding-left: 12pt;
591
+ list-style-type: disc;
592
+ }
593
+
594
+ .hb-career-list li,
595
+ .career-list li {
596
+ margin: 0 0 2pt;
597
+ padding-left: 0;
598
+ line-height: 1.2;
599
+ font-size: 8.75pt;
600
+ }
601
+
602
+ .hb-career-list li::before,
603
+ .career-list li::before {
604
+ content: none;
605
+ }
606
+
607
+ /* ------------------------------
608
+ TABLE OF CONTENTS
609
+ Stable within content width
610
+ ------------------------------ */
611
+ /* ── Table of Contents ─────────────────────────── */
612
+ .toc-page {
613
+ display: block;
614
+ width: 100%;
615
+ margin: 0;
616
+ padding: 0;
617
+ page-break-after: always;
618
+ break-after: page;
619
+ page-break-inside: avoid;
620
+ break-inside: avoid;
621
+ }
622
+
623
+ .toc {
624
+ width: 100%;
625
+ margin: 0;
626
+ padding: 0;
627
+ }
628
+
629
+ .toc-heading {
630
+ display: block;
631
+ font-size: 14pt;
632
+ font-weight: 700;
633
+ text-transform: uppercase;
634
+ color: #0263A3;
635
+ margin: 0 0 6pt;
636
+ padding-bottom: 3pt;
637
+ border-bottom: 2.5pt solid #0263A3;
638
+ line-height: 1.2;
639
+ }
640
+
641
+ /* Each TOC row: flex row, single line, with dot leader filling gap */
642
+ .toc-entry {
643
+ display: flex;
644
+ flex-direction: row;
645
+ align-items: baseline;
646
+ margin: 0;
647
+ padding: 1.5pt 0;
648
+ line-height: 1.2;
649
+ font-size: 9pt;
650
+ color: #1a1a1a;
651
+ overflow: hidden;
652
+ white-space: nowrap;
653
+ }
654
+
655
+ /* Major section rows: bold, uppercase */
656
+ .toc-entry--major {
657
+ font-weight: 700;
658
+ text-transform: uppercase;
659
+ font-size: 9pt;
660
+ }
661
+
662
+ /* Sub-items (universities): regular weight */
663
+ .toc-entry--sub {
664
+ font-weight: 400;
665
+ text-transform: none;
666
+ font-size: 8.5pt;
667
+ }
668
+
669
+ /* Indented entries (universities) */
670
+ .toc-entry--indent {
671
+ padding-left: 16pt;
672
+ }
673
+
674
+ /* Title text: no shrink, no wrap */
675
+ .toc-label {
676
+ flex: 0 1 auto;
677
+ overflow: hidden;
678
+ text-overflow: clip;
679
+ white-space: nowrap;
680
+ }
681
+
682
+ /* Dot leader: fills remaining space between title and page number */
683
+ .toc-leader {
684
+ flex: 1 1 auto;
685
+ overflow: hidden;
686
+ white-space: nowrap;
687
+ margin: 0 3pt;
688
+ min-width: 16pt;
689
+ }
690
+
691
+ .toc-leader::after {
692
+ content: " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .";
693
+ display: inline;
694
+ font-size: 7.5pt;
695
+ letter-spacing: 0pt;
696
+ color: #999;
697
+ word-spacing: 0;
698
+ }
699
+
700
+ /* Page number: fixed width, right-aligned, no shrink */
701
+ span.toc-page {
702
+ flex: 0 0 auto;
703
+ text-align: right;
704
+ min-width: 20pt;
705
+ font-weight: 400;
706
+ color: #1a1a1a;
707
+ white-space: nowrap;
708
+ font-style: normal;
709
+ font-size: inherit;
710
+ }
711
+
712
+ /* Links inside TOC */
713
+ .toc-entry a,
714
+ .toc-entry a:visited {
715
+ color: inherit;
716
+ text-decoration: none;
717
+ border-bottom: none;
718
+ }
719
+
720
+ /* ------------------------------
721
+ SECTION BLOCKS
722
+ ------------------------------ */
723
+ .section-block {
724
+ display: block;
725
+ width: 100%;
726
+ margin: 0 0 12pt;
727
+ padding: 0;
728
+ border: none;
729
+ }
730
+
731
+ .section-block> :first-child {
732
+ margin-top: 0;
733
+ }
734
+
735
+ .summary-section {
736
+ display: block;
737
+ }
738
+
739
+ /* Tier group divider heading (Tier One, Tier Two, etc.) */
740
+ .tier-group-heading {
741
+ page-break-before: always;
742
+ break-before: page;
743
+ }
744
+
745
+ .tier-group-heading .hb-heading-1,
746
+ .tier-group-heading .h1 {
747
+ font-size: 12pt;
748
+ font-weight: normal;
749
+ color: #199970;
750
+ text-align: center;
751
+ border-bottom: none;
752
+ border-top: none;
753
+ padding: 10pt 0;
754
+ margin: 20pt 0 12pt;
755
+ }
756
+
757
+ /* ------------------------------
758
+ ENROLMENT STEPS
759
+ Dedicated page block
760
+ ------------------------------ */
761
+ .hb-enrollment-steps,
762
+ .sec-steps {
763
+ page-break-before: always;
764
+ break-before: page;
765
+ page-break-after: always;
766
+ break-after: page;
767
+ }
768
+
769
+ .hb-step {
770
+ margin: 0 0 12pt;
771
+ padding: 8pt 0 8pt 12pt;
772
+ border-left: 3pt solid #199970;
773
+ background: #F6FBF9;
774
+ page-break-inside: avoid;
775
+ break-inside: avoid;
776
+ }
777
+
778
+ .hb-step-title {
779
+ font-size: 10.5pt;
780
+ font-weight: 700;
781
+ color: #199970;
782
+ margin: 0 0 4pt;
783
+ line-height: 1.25;
784
+ page-break-after: avoid;
785
+ break-after: avoid;
786
+ }
787
+
788
+ .hb-step-qr-wrap {
789
+ margin: 4pt 0 6pt;
790
+ text-align: center;
791
+ }
792
+
793
+ .hb-step-qr {
794
+ display: inline-block;
795
+ width: 72pt;
796
+ height: 72pt;
797
+ margin: 4pt 0 6pt;
798
+ }
799
+
800
+ .hb-telegram-link {
801
+ margin: 4pt 0 2pt;
802
+ text-align: center;
803
+ }
804
+
805
+ .hb-telegram-link a {
806
+ color: #0263A3;
807
+ text-decoration: underline;
808
+ font-size: 9pt;
809
+ }
810
+
811
+ .hb-plain-url {
812
+ font-weight: 400;
813
+ color: #0263A3;
814
+ text-decoration: underline;
815
+ }
816
+
817
+ /* legacy step support */
818
+ .sec-steps .h3 {
819
+ margin: 0 0 4pt;
820
+ font-size: 10pt;
821
+ color: #199970;
822
+ }
823
+
824
+ .sec-steps .p {
825
+ margin: 0 0 6pt;
826
+ }
827
+
828
+ .sec-steps .ul {
829
+ margin: 0 0 6pt 16pt;
830
+ }
831
+
832
+ /* ------------------------------
833
+ UNIVERSITY / SCHOOL PAGE
834
+ ------------------------------ */
835
+ .uni,
836
+ .hb-school-profile {
837
+ page-break-before: always;
838
+ break-before: page;
839
+ padding: 0;
840
+ }
841
+
842
+ .hb-uni-name,
843
+ .uni-name {
844
+ font-size: 12pt;
845
+ font-weight: 700;
846
+ margin: 0 0 3pt;
847
+ padding-bottom: 0;
848
+ border-bottom: none;
849
+ color: #0263A3;
850
+ line-height: 1.2;
851
+ text-transform: none;
852
+ text-decoration: underline;
853
+ text-underline-offset: 2pt;
854
+ }
855
+
856
+ .hb-uni-name a,
857
+ .hb-uni-name-link,
858
+ .uni-name a.uni-name-link {
859
+ color: #0263A3;
860
+ text-decoration: underline;
861
+ text-underline-offset: 2pt;
862
+ border-bottom: none;
863
+ font-weight: 700;
864
+ }
865
+
866
+ /* School info directly below school name in green */
867
+ .hb-summary-title,
868
+ .summary-title {
869
+ font-size: 10pt;
870
+ font-weight: 700;
871
+ text-transform: uppercase;
872
+ margin: 6pt 0 4pt;
873
+ color: #199970;
874
+ letter-spacing: 0.2px;
875
+ }
876
+
877
+ .hb-summary-ul,
878
+ .summary-ul {
879
+ list-style: none;
880
+ margin: 0 0 8pt;
881
+ padding: 0;
882
+ }
883
+
884
+ .hb-summary-ul li,
885
+ .summary-ul li {
886
+ margin: 0 0 3pt;
887
+ padding: 0;
888
+ font-size: 10pt;
889
+ line-height: 1.3;
890
+ color: #199970;
891
+ font-weight: 700;
892
+ }
893
+
894
+ .hb-summary-ul li::before,
895
+ .summary-ul li::before {
896
+ content: none;
897
+ }
898
+
899
+ .hb-lbl,
900
+ .lbl {
901
+ font-weight: 700;
902
+ color: #199970;
903
+ }
904
+
905
+ .hb-uni-website,
906
+ .uni-website {
907
+ margin: 4pt 0 8pt;
908
+ font-size: 10pt;
909
+ color: #199970;
910
+ }
911
+
912
+ /* top school layout */
913
+ .hb-school-top-table,
914
+ .school-top-table {
915
+ width: 100%;
916
+ border-collapse: collapse;
917
+ table-layout: fixed;
918
+ margin: 0 0 6pt;
919
+ border: none;
920
+ }
921
+
922
+ .hb-school-top-table td,
923
+ .school-top-table td {
924
+ border: none;
925
+ padding: 0;
926
+ vertical-align: top;
927
+ }
928
+
929
+ .hb-school-top-summary,
930
+ .school-top-summary {
931
+ width: 52%;
932
+ padding-right: 10pt !important;
933
+ }
934
+
935
+ .hb-school-top-campus,
936
+ .school-top-campus {
937
+ width: 48%;
938
+ text-align: right;
939
+ }
940
+
941
+ .hb-campus-img,
942
+ .campus-top-img {
943
+ display: block;
944
+ width: 100%;
945
+ max-height: 180pt;
946
+ height: 180pt;
947
+ border: 0.75pt solid #CBD5E1;
948
+ object-fit: cover;
949
+ box-shadow: 0 1pt 4pt rgba(0, 0, 0, 0.10);
950
+ }
951
+
952
+ .hb-campus-caption,
953
+ .campus-top-cap {
954
+ display: none;
955
+ }
956
+
957
+ .hb-campus-placeholder,
958
+ .campus-placeholder-cell {
959
+ width: 100%;
960
+ min-height: 120pt;
961
+ border: 1pt dashed #CBD5E1;
962
+ color: #94A3B8;
963
+ font-size: 10pt;
964
+ padding: 10pt;
965
+ background: #F8FAFC;
966
+ }
967
+
968
+ /* ------------------------------
969
+ BENEFITS SECTION
970
+ Guideline colors retained
971
+ ------------------------------ */
972
+ .hb-benefits-section,
973
+ .benefits-section {
974
+ clear: both;
975
+ margin: 8pt 0 8pt;
976
+ page-break-inside: avoid;
977
+ break-inside: avoid;
978
+ }
979
+
980
+ .hb-benefits-heading {
981
+ margin: 0 0 4pt;
982
+ line-height: 1.3;
983
+ }
984
+
985
+ .hb-benefits-bar,
986
+ .benefits-bar {
987
+ display: inline-block;
988
+ background: linear-gradient(135deg, #199970 0%, #00C853 100%);
989
+ color: #ffffff;
990
+ font-weight: 700;
991
+ padding: 4pt 12pt;
992
+ font-size: 10pt;
993
+ line-height: 1.35;
994
+ letter-spacing: 0.3px;
995
+ }
996
+
997
+ .hb-benefits-ul,
998
+ .benefits-ul {
999
+ list-style: none;
1000
+ margin: 0;
1001
+ padding: 0;
1002
+ }
1003
+
1004
+ .hb-benefits-ul li,
1005
+ .hb-benefit-item,
1006
+ .benefit-li {
1007
+ margin: 3pt 0;
1008
+ padding: 0;
1009
+ font-size: 9.75pt;
1010
+ line-height: 1.35;
1011
+ background: none;
1012
+ border: none;
1013
+ }
1014
+
1015
+ .hb-benefits-ul li::before,
1016
+ .hb-benefit-item::before,
1017
+ .benefit-li::before {
1018
+ content: "\27A4";
1019
+ display: inline-block;
1020
+ margin-right: 6pt;
1021
+ color: #199970;
1022
+ font-weight: 700;
1023
+ }
1024
+
1025
+ .hb-benefit-text,
1026
+ .benefit-text {
1027
+ display: inline-block;
1028
+ background: #E0F7FA;
1029
+ padding: 2pt 6pt;
1030
+ border-left: 2pt solid #00BCD4;
1031
+ }
1032
+
1033
+ .benefit-bullet {
1034
+ display: none;
1035
+ }
1036
+
1037
+ /* ------------------------------
1038
+ FUNDING SECTION
1039
+ Matches benefits styling with distinct bar color
1040
+ ------------------------------ */
1041
+ .hb-funding-section,
1042
+ .funding-section {
1043
+ clear: both;
1044
+ margin: 6pt 0 8pt;
1045
+ page-break-inside: avoid;
1046
+ break-inside: avoid;
1047
+ }
1048
+
1049
+ .hb-funding-heading {
1050
+ margin: 0 0 4pt;
1051
+ line-height: 1.3;
1052
+ }
1053
+
1054
+ .hb-funding-bar,
1055
+ .funding-bar {
1056
+ display: inline-block;
1057
+ background: linear-gradient(135deg, #0263A3 0%, #0288D1 100%);
1058
+ color: #ffffff;
1059
+ font-weight: 700;
1060
+ padding: 4pt 12pt;
1061
+ font-size: 10pt;
1062
+ line-height: 1.35;
1063
+ letter-spacing: 0.3px;
1064
+ }
1065
+
1066
+ .funding-ul {
1067
+ list-style: none;
1068
+ margin: 0;
1069
+ padding: 0;
1070
+ }
1071
+
1072
+ .funding-li {
1073
+ margin: 3pt 0;
1074
+ padding: 0;
1075
+ font-size: 9.75pt;
1076
+ line-height: 1.35;
1077
+ }
1078
+
1079
+ .funding-li .benefit-bullet {
1080
+ display: none;
1081
+ }
1082
+
1083
+ .funding-li::before {
1084
+ content: "\27A4";
1085
+ display: inline-block;
1086
+ margin-right: 6pt;
1087
+ color: #0263A3;
1088
+ font-weight: 700;
1089
+ }
1090
+
1091
+ .hb-funding-section .hb-benefit-item::before {
1092
+ color: #0263A3;
1093
+ }
1094
+
1095
+ .hb-funding-section .hb-benefit-text {
1096
+ background: #E3F2FD;
1097
+ border-left: 2pt solid #0288D1;
1098
+ }
1099
+
1100
+ .funding-li .benefit-text {
1101
+ display: inline-block;
1102
+ background: #E3F2FD;
1103
+ padding: 2pt 6pt;
1104
+ border-left: 2pt solid #0288D1;
1105
+ }
1106
+
1107
+ /* ------------------------------
1108
+ UNIVERSITY SUMMARY LIST
1109
+ Black, bold, numbered
1110
+ ------------------------------ */
1111
+ .hb-university-list {
1112
+ margin: 4pt 0 12pt 18pt;
1113
+ padding: 0;
1114
+ list-style: decimal;
1115
+ font-size: 10pt;
1116
+ color: #1a1a1a;
1117
+ }
1118
+
1119
+ .hb-university-list li {
1120
+ margin: 0 0 5pt;
1121
+ padding-left: 0;
1122
+ font-weight: 700;
1123
+ line-height: 1.45;
1124
+ color: #0263A3;
1125
+ }
1126
+
1127
+ .hb-university-list li::before {
1128
+ content: none;
1129
+ }
1130
+
1131
+ /* ------------------------------
1132
+ QUALIFICATION / MUTED TEXT
1133
+ ------------------------------ */
1134
+ .hb-qualify,
1135
+ .qualify {
1136
+ margin: 10pt 0 8pt;
1137
+ font-weight: 700;
1138
+ font-size: 10pt;
1139
+ font-style: normal;
1140
+ color: #1a1a1a;
1141
+ width: 100%;
1142
+ padding: 8pt 10pt;
1143
+ background: #F4F7FA;
1144
+ border-left: 3pt solid #0263A3;
1145
+ }
1146
+
1147
+ .hb-muted,
1148
+ .muted {
1149
+ color: #666666;
1150
+ font-size: 10pt;
1151
+ }
1152
+
1153
+ /* ------------------------------
1154
+ SECTION-SPECIFIC OVERRIDES
1155
+ ------------------------------ */
1156
+ .sec-overview .hb-heading-1,
1157
+ .sec-overview .h2 {
1158
+ margin-top: 0;
1159
+ }
1160
+
1161
+ .sec-overview .hb-paragraph,
1162
+ .sec-overview .p {
1163
+ text-align: justify;
1164
+ line-height: 1.4;
1165
+ }
1166
+
1167
+ .sec-qualification .hb-heading-1,
1168
+ .sec-qualification .h2 {
1169
+ color: #C00000;
1170
+ }
1171
+
1172
+ .sec-how .hb-heading-2 {
1173
+ color: #C00000;
1174
+ }
1175
+
1176
+ .sec-qualification .hb-heading-2,
1177
+ .sec-qualification .h3 {
1178
+ margin-top: 10pt;
1179
+ }
1180
+
1181
+ .sec-qualification .hb-table,
1182
+ .sec-qualification .tbl {
1183
+ margin: 6pt 0 10pt;
1184
+ }
1185
+
1186
+ .sec-policy .hb-heading-2,
1187
+ .sec-policy .h3 {
1188
+ margin-top: 10pt;
1189
+ padding-bottom: 0;
1190
+ }
1191
+
1192
+ .sec-policy .hb-note,
1193
+ .sec-policy .note {
1194
+ margin-top: 8pt;
1195
+ }
1196
+
1197
+ .sec-policy .hb-table,
1198
+ .sec-policy .tbl {
1199
+ margin: 6pt 0 10pt;
1200
+ }
1201
+
1202
+ .sec-refund .hb-table,
1203
+ .sec-refund .tbl {
1204
+ margin: 6pt 0 10pt;
1205
+ }
1206
+
1207
+ .sec-contributions .hb-paragraph,
1208
+ .sec-contributions .p,
1209
+ .sec-contributions .hb-bullet-list li,
1210
+ .sec-contributions .ul li {
1211
+ font-weight: 700;
1212
+ color: #1a1a1a;
1213
+ }
1214
+
1215
+ .sec-contributions .hb-heading-1,
1216
+ .sec-contributions .hb-heading-2,
1217
+ .sec-contributions .h2,
1218
+ .sec-contributions .h3 {
1219
+ color: #199970;
1220
+ }
1221
+
1222
+ .sec-contributions .hb-table,
1223
+ .sec-contributions .tbl {
1224
+ margin: 6pt 0 10pt;
1225
+ }
1226
+
1227
+ .sec-contributions .hb-programs th,
1228
+ .sec-contributions .hb-programs td,
1229
+ .sec-contributions table.programs th,
1230
+ .sec-contributions table.programs td,
1231
+ .sec-funding .hb-programs th,
1232
+ .sec-funding .hb-programs td,
1233
+ .sec-funding table.programs th,
1234
+ .sec-funding table.programs td {
1235
+ text-align: left;
1236
+ }
1237
+
1238
+ .sec-contributions .hb-programs th,
1239
+ .sec-contributions table.programs th,
1240
+ .sec-funding .hb-programs th,
1241
+ .sec-funding table.programs th {
1242
+ color: #ffffff;
1243
+ font-weight: 700;
1244
+ font-size: 10pt;
1245
+ line-height: 1.2;
1246
+ background: #199970;
1247
+ }
1248
+
1249
+ .sec-funding .hb-table,
1250
+ .sec-funding .tbl {
1251
+ margin: 6pt 0 10pt;
1252
+ }
1253
+
1254
+ .sec-funding {
1255
+ page-break-before: always;
1256
+ break-before: page;
1257
+ }
1258
+
1259
+ .sec-funding .hb-table th,
1260
+ .sec-funding .tbl th {
1261
+ color: #ffffff;
1262
+ font-weight: 700;
1263
+ background: #199970;
1264
+ }
1265
+
1266
+ /* Images must stay inside margins */
1267
+ .page-content img {
1268
+ max-width: 100%;
1269
+ height: auto;
1270
+ }
1271
+
1272
+ /* ------------------------------
1273
+ BREAKDOWN β€” RELOCATION COST
1274
+ ------------------------------ */
1275
+
1276
+ /* Banner heading (teal background bar) */
1277
+ .hb-banner-heading {
1278
+ background: linear-gradient(135deg, #199970 0%, #0D7B55 100%);
1279
+ color: #FFFFFF !important;
1280
+ padding: 8pt 14pt;
1281
+ margin: 12pt 0 8pt;
1282
+ font-size: 11.5pt;
1283
+ font-weight: 700;
1284
+ letter-spacing: 0.4px;
1285
+ }
1286
+
1287
+ /* Page break before Relocation Cost */
1288
+ .sec-breakdown .page-break {
1289
+ page-break-before: always;
1290
+ break-before: page;
1291
+ }
1292
+
1293
+ /* Relocation cost table β€” merged note cell */
1294
+ .hb-relocation-table {
1295
+ margin: 0 0 12pt;
1296
+ }
1297
+
1298
+ .hb-relocation-table td {
1299
+ vertical-align: top;
1300
+ padding: 5pt 8pt;
1301
+ border: 0.75pt solid #CBD5E1;
1302
+ font-size: 9.5pt;
1303
+ }
1304
+
1305
+ .hb-merged-note {
1306
+ font-style: italic;
1307
+ font-weight: 400;
1308
+ font-size: 9.5pt;
1309
+ line-height: 1.4;
1310
+ display: block;
1311
+ padding: 4pt 2pt;
1312
+ }
1313
+
1314
+ /* ISP FINANCING line */
1315
+ .hb-isp-financing {
1316
+ margin: 16pt 0 8pt;
1317
+ padding: 8pt 0;
1318
+ border-top: 2pt solid #0263A3;
1319
+ border-bottom: 2pt solid #0263A3;
1320
+ text-align: center;
1321
+ font-size: 10.5pt;
1322
+ letter-spacing: 0.3px;
1323
+ }
1324
+
1325
+ .hb-isp-financing strong {
1326
+ font-weight: 700;
1327
+ color: #0263A3;
1328
+ }
1329
+
1330
+ .hb-isp-financing em {
1331
+ font-style: italic;
1332
+ color: #199970;
1333
+ font-weight: 700;
1334
+ }
1335
+
1336
+ /* NB: CREDIT FACILITY β€” green */
1337
+ .hb-credit-note {
1338
+ text-align: center;
1339
+ color: #199970;
1340
+ font-size: 10.5pt;
1341
+ font-weight: 700;
1342
+ margin: 8pt 0;
1343
+ letter-spacing: 0.2px;
1344
+ }
app/templates/handbook.html ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="utf-8">
6
+ <title>ISP Handbook</title>
7
+ <style>{{ font_css }}</style>
8
+ <link rel="stylesheet" href="{{ base_url }}/css/print.css">
9
+ {% if extra_css %}
10
+ <style>{{ extra_css }}</style>
11
+ {% endif %}
12
+ </head>
13
+
14
+ <body>
15
+
16
+ {# Ò”€Ò”€ Page Header (hidden; JS extracts img src for Playwright header_template) Ò”€Ò”€ #}
17
+ {% if header_image %}
18
+ <div class="page-header">
19
+ <img src="{{ header_image }}" alt="" />
20
+ </div>
21
+ {% endif %}
22
+
23
+ {# Ò”€Ò”€ Right-Side Label (position:fixed, repeats on every page) Ò”€Ò”€ #}
24
+ {% if label_image %}
25
+ <div class="hb-right-label">
26
+ <img src="{{ label_image }}" alt="" />
27
+ </div>
28
+ {% endif %}
29
+
30
+ {# Ò”€Ò”€ Footer (hidden; Playwright footer_template renders page numbers) Ò”€Ò”€ #}
31
+ <div class="page-footer"></div>
32
+
33
+ {# Ò”€Ò”€ Cover Page Ò”€Ò”€ #}
34
+ {% if cover_image %}
35
+ {% include "partials/cover.html" %}
36
+ {% endif %}
37
+
38
+ {# Ò”€Ò”€ TOC Image Page Ò”€Ò”€ #}
39
+ {% if toc_image %}
40
+ <div class="cover-page">
41
+ <img class="cover-img" src="{{ toc_image }}" alt="Table of Contents" />
42
+ </div>
43
+ {% endif %}
44
+
45
+ {# Ò”€Ò”€ Page Content Wrapper (padding clears the fixed header/footer) Ò”€Ò”€ #}
46
+ <div class="page-content">
47
+
48
+ {# Ò”€Ò”€ Dynamic TOC Ò”€Ò”€ #}
49
+ {% if not toc_image and toc_items %}
50
+ <div class="section-block toc-page" id="toc" data-section-key="table_of_contents"
51
+ data-section-title="{{ toc_title | e }}" {% if toc_sort_order is not none %}data-sort="{{ toc_sort_order }}"
52
+ {% endif %}>
53
+ {% include "partials/toc.html" %}
54
+ </div>
55
+ {% endif %}
56
+
57
+ {# Ò”€Ò”€ Global Sections Ò”€Ò”€ #}
58
+ {% for gs in general_sections %}
59
+ <div class="section-block{{ ' page-break' if gs.page_break else '' }} {{ gs.sec_class }}"
60
+ id="{{ gs.anchor | e }}" data-section-key="{{ gs.data.section_key | default('') | e }}"
61
+ data-section-title="{{ gs.data.section_title | default('') | e }}" {% if gs.data.sort_order is not none
62
+ %}data-sort="{{ gs.data.sort_order }}" {% endif %}>
63
+ {% if gs.blocks %}
64
+ {% for block in gs.blocks %}
65
+ {% include "partials/blocks/render_block.html" %}
66
+ {% endfor %}
67
+ {% else %}
68
+ {{ gs.rendered_html }}
69
+ {% endif %}
70
+ </div>
71
+ {% endfor %}
72
+
73
+ {# Ò”€Ò”€ Summary of Universities Ò”€Ò”€ #}
74
+ {% if summary_block %}
75
+ <div class="section-block page-break summary-section sec-summary" id="{{ summary_block.anchor | e }}"
76
+ data-section-key="summary_of_universities"
77
+ data-section-title="{{ summary_block.data.section_title | default('Summary of Universities') | e }}" {% if
78
+ summary_block.data.sort_order is not none %}data-sort="{{ summary_block.data.sort_order }}" {% endif %}>
79
+ {% if summary_block.blocks %}
80
+ {% for block in summary_block.blocks %}
81
+ {% include "partials/blocks/render_block.html" %}
82
+ {% endfor %}
83
+ {% else %}
84
+ {{ summary_block.rendered_html }}
85
+ {% endif %}
86
+ </div>
87
+ {% endif %}
88
+
89
+ {# ── University Sections (grouped by tier with divider headings) ── #}
90
+ {% if university_blocks %}
91
+ {% for block in university_blocks %}
92
+ {% include "partials/blocks/render_block.html" %}
93
+ {% endfor %}
94
+ {% elif universities %}
95
+ {% for uni in universities %}
96
+ {% if uni.tier_group_start and uni.tier_group_label %}
97
+ <div class="section-block page-break tier-group-heading" data-tier="{{ uni.tier_label | default('') | e }}">
98
+ <h1 class="h1 hb-heading-1" style="margin-top:0.5em;margin-bottom:0.3em;">{{ uni.tier_group_label | e }}
99
+ </h1>
100
+ </div>
101
+ {% endif %}
102
+ {% include "partials/university.html" %}
103
+ {% endfor %}
104
+ {% endif %}
105
+
106
+ </div>{# /page-content #}
107
+
108
+ {# Ò”€Ò”€ Bottom Image Pages Ò”€Ò”€ #}
109
+ {% for img_path in bottom_pages %}
110
+ <div class="fullpage-img-wrap">
111
+ <img class="fullpage-img" src="{{ img_path }}" alt="Handbook Page Image" />
112
+ </div>
113
+ {% endfor %}
114
+
115
+ {# Ò”€Ò”€ Debug Summary Ò”€Ò”€ #}
116
+ {% if debug and stats %}
117
+ <div class="page-break"></div>
118
+ <div class="section-block debug-block">
119
+ <h2 class="h2">PDF Debug Summary</h2>
120
+ <pre style="font-size:10px;">{{ stats | tojson(indent=2) }}</pre>
121
+ </div>
122
+ {% endif %}
123
+
124
+ </body>
125
+
126
+ </html>
app/templates/partials/blocks/bullet_list.html ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# Block partial: bullet_list (ordered or unordered) β€” supports HTML-formatted entries #}
2
+ {% if block.data.ordered %}
3
+ <ol class="{{ block.css_class }}">
4
+ {% for item in block.data.entries %}
5
+ {% if block.data.html_entries %}
6
+ <li>{{ item }}</li>
7
+ {% else %}
8
+ <li>{{ item | e }}</li>
9
+ {% endif %}
10
+ {% endfor %}
11
+ </ol>
12
+ {% else %}
13
+ <ul class="{{ block.css_class }}">
14
+ {% for item in block.data.entries %}
15
+ {% if block.data.html_entries %}
16
+ <li>{{ item }}</li>
17
+ {% else %}
18
+ <li>{{ item | e }}</li>
19
+ {% endif %}
20
+ {% endfor %}
21
+ </ul>
22
+ {% endif %}
app/templates/partials/blocks/enrollment_steps.html ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# Block partial: enrollment_steps β€” each step visually separated #}
2
+ {% for step in block.data.steps %}
3
+ <div class="hb-step avoid-break">
4
+ {% if step.title %}
5
+ <div class="hb-step-title">Step {{ step.number }}: {{ step.title | e }}</div>
6
+ {% endif %}
7
+ {% if step.body_html %}
8
+ <p class="hb-paragraph">{{ step.body_html }}</p>
9
+ {% elif step.body %}
10
+ <p class="hb-paragraph">{{ step.body | e }}</p>
11
+ {% endif %}
12
+ {% if step.links %}
13
+ <ul class="hb-bullet-list">
14
+ {% for lnk in step.links %}
15
+ <li><a href="{{ lnk.url | e }}" target="_blank" rel="noopener noreferrer">{{ lnk.label | e }}</a></li>
16
+ {% endfor %}
17
+ </ul>
18
+ {% endif %}
19
+ {% if step.plain_links %}
20
+ <ul class="hb-bullet-list">
21
+ {% for plain_url in step.plain_links %}
22
+ <li><a href="{{ plain_url | e }}" target="_blank" rel="noopener noreferrer">{{ plain_url | e }}</a></li>
23
+ {% endfor %}
24
+ </ul>
25
+ {% endif %}
26
+ {% if step.qr_url %}
27
+ <div class="hb-step-qr-wrap">
28
+ <img class="hb-step-qr" src="{{ step.qr_url | e }}" alt="QR Code" />
29
+ </div>
30
+ {% endif %}
31
+ {% if step.telegram_url %}
32
+ <div class="hb-telegram-link">
33
+ <a href="{{ step.telegram_url | e }}" target="_blank" rel="noopener noreferrer">{{ step.telegram_url | e }}</a>
34
+ </div>
35
+ <p class="hb-paragraph">This telegram group will help you interact with program administrators and other prospective
36
+ students where you can ask any questions you may have about the program.</p>
37
+ {% endif %}
38
+ </div>
39
+ {% endfor %}
app/templates/partials/blocks/heading.html ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {# Block partial: heading_1 / heading_2 #}
2
+ {% if block.block_type == 'heading_1' %}
3
+ <h2 class="{{ block.css_class | default('hb-heading-1') }}">{{ block.data.text | e }}</h2>
4
+ {% elif block.block_type == 'heading_2' %}
5
+ <h3 class="{{ block.css_class | default('hb-heading-2') }}">{{ block.data.text | e }}</h3>
6
+ {% endif %}
app/templates/partials/blocks/note.html ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# Block partial: note (standalone or inline-parts) #}
2
+ {% if block.data.inline and block.data.parts %}
3
+ <div class="{{ block.css_class | default('hb-note') }}">
4
+ {% for part in block.data.parts %}
5
+ {% if part.style == 'red_bold' %}
6
+ <strong class="hb-note-keyword">{{ part.text | e }}</strong>
7
+ {% elif part.style == 'bold' %}
8
+ <strong>{{ part.text | e }}</strong>
9
+ {% elif part.style == 'italic' %}
10
+ <em>{{ part.text | e }}</em>
11
+ {% else %}
12
+ {{ part.text | e }}
13
+ {% endif %}
14
+ {% endfor %}
15
+ </div>
16
+ {% else %}
17
+ <div class="{{ block.css_class | default('hb-note') }}">
18
+ {% set text = block.data.text | default('') %}
19
+ {# Highlight NOTE / ONLY IF keywords in bold + red; rest stays bold via CSS #}
20
+ {% if text.upper().startswith('NOTE:') %}
21
+ <span class="hb-note-keyword">NOTE:</span> {{ text[5:] | e }}
22
+ {% elif text.upper().startswith('NOTE ') %}
23
+ <span class="hb-note-keyword">NOTE</span> {{ text[4:] | e }}
24
+ {% elif text.upper().startswith('NOTE') %}
25
+ <span class="hb-note-keyword">NOTE</span>{{ text[4:] | e }}
26
+ {% elif text.upper().startswith('ONLY IF:') %}
27
+ <span class="hb-note-keyword">ONLY IF:</span> {{ text[8:] | e }}
28
+ {% elif text.upper().startswith('ONLY IF') %}
29
+ <span class="hb-note-keyword">ONLY IF</span> {{ text[7:] | e }}
30
+ {% else %}
31
+ {{ text | e }}
32
+ {% endif %}
33
+ </div>
34
+ {% endif %}
app/templates/partials/blocks/paragraph.html ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {# Block partial: paragraph β€” supports pre-formatted HTML for bold emphasis #}
2
+ {% if block.data.html %}
3
+ <p class="hb-paragraph">{{ block.data.html }}</p>
4
+ {% else %}
5
+ <p class="hb-paragraph">{{ block.data.text | e }}</p>
6
+ {% endif %}
app/templates/partials/blocks/render_block.html ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# Universal block dispatcher β€” renders any RenderBlock via its type-specific partial #}
2
+ {% if block.block_type in ('heading_1', 'heading_2') %}
3
+ {% include "partials/blocks/heading.html" %}
4
+ {% elif block.block_type == 'paragraph' %}
5
+ {% include "partials/blocks/paragraph.html" %}
6
+ {% elif block.block_type == 'bullet_list' %}
7
+ {% include "partials/blocks/bullet_list.html" %}
8
+ {% elif block.block_type == 'note' %}
9
+ {% include "partials/blocks/note.html" %}
10
+ {% elif block.block_type == 'table' %}
11
+ {% include "partials/blocks/table.html" %}
12
+ {% elif block.block_type == 'enrollment_steps' %}
13
+ {% include "partials/blocks/enrollment_steps.html" %}
14
+ {% elif block.block_type == 'university_summary' %}
15
+ {% include "partials/blocks/university_summary.html" %}
16
+ {% elif block.block_type == 'school_profile' %}
17
+ {% include "partials/blocks/school_profile.html" %}
18
+ {% endif %}
app/templates/partials/blocks/school_profile.html ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# Block partial: school_profile β€” full university page #}
2
+ <div class="{{ block.css_class }}" {% if block.data.anchor %}id="{{ block.data.anchor | e }}" {% endif %} {% if
3
+ block.data.sort_order is not none %}data-sort="{{ block.data.sort_order }}" {% endif %}
4
+ data-section-key="university" data-section-title="{{ block.data.name | e }}">
5
+
6
+ {# ── University Title ── #}
7
+ {% if block.data.website %}
8
+ <div class="hb-uni-name">
9
+ <a class="hb-uni-name-link" href="{{ block.data.website | e }}" target="_blank" rel="noopener noreferrer">{{
10
+ block.data.name | e }}</a>
11
+ </div>
12
+ {% else %}
13
+ <div class="hb-uni-name">{{ block.data.name | e }}</div>
14
+ {% endif %}
15
+
16
+ {# ── Two-column: Summary + Campus Image ── #}
17
+ <table class="hb-school-top-table">
18
+ <tr>
19
+ <td class="hb-school-top-summary">
20
+ {% if block.data.overview %}
21
+ <ul class="hb-summary-ul">
22
+ {% if block.data.overview.founded %}<li><span class="hb-lbl">Founded:</span> {{
23
+ block.data.overview.founded | e }}</li>{% endif %}
24
+ {% if block.data.overview.total_students %}<li><span class="hb-lbl">Total Students:</span> {{
25
+ block.data.overview.total_students | e }}</li>{% endif %}
26
+ {% if block.data.overview.undergraduates %}<li><span class="hb-lbl">Undergraduate Students:</span>
27
+ {{ block.data.overview.undergraduates | e }}</li>{% endif %}
28
+ {% if block.data.overview.postgraduates %}<li><span class="hb-lbl">Postgraduate Students:</span> {{
29
+ block.data.overview.postgraduates | e }}</li>{% endif %}
30
+ {% if block.data.overview.acceptance_rate %}
31
+ <li><span class="hb-lbl">Acceptance Rate:</span> {{
32
+ block.data.overview.acceptance_rate | e }}</li>
33
+ {% endif %}
34
+ {% if block.data.overview.location %}
35
+ <li><span class="hb-lbl">Location:</span> {{
36
+ block.data.overview.location | e }}</li>
37
+ {% endif %}
38
+ {% if block.data.overview.tuition %}<li><span class="hb-lbl">Yearly Tuition/Out-of-State
39
+ Tuition:</span> {{ block.data.overview.tuition | e }}</li>{% endif %}
40
+ </ul>
41
+ {% endif %}
42
+ </td>
43
+ <td class="hb-school-top-campus">
44
+ {% if block.data.campus_image %}
45
+ <img class="hb-campus-img" src="{{ block.data.campus_image | e }}" alt="Campus Image" />
46
+ {% else %}
47
+ <div class="hb-campus-placeholder">Campus image unavailable</div>
48
+ {% endif %}
49
+ </td>
50
+ </tr>
51
+ </table>
52
+
53
+ {# ── Benefits ── #}
54
+ {% if block.data.benefits is not none %}
55
+ <div class="hb-benefits-section">
56
+ <div class="hb-benefits-heading"><span class="hb-benefits-bar">Benefits for ISP Students</span>
57
+ </div>
58
+ {% if block.data.benefits %}
59
+ <ul class="hb-benefits-ul">
60
+ {% for b in block.data.benefits %}
61
+ {% if b %}
62
+ <li class="hb-benefit-item"><span class="hb-benefit-text">{{ b | e }}</span></li>
63
+ {% endif %}
64
+ {% endfor %}
65
+ </ul>
66
+ {% else %}
67
+ <div class="hb-muted">No benefits listed.</div>
68
+ {% endif %}
69
+ </div>
70
+ {% endif %}
71
+
72
+ {# ── Funding ── #}
73
+ {% if block.data.funding_items %}
74
+ <div class="hb-funding-section">
75
+ <div class="hb-funding-heading"><span class="hb-funding-bar">{{ block.data.funding_heading | default('Funding
76
+ Available') | e }}</span></div>
77
+ <ul class="hb-benefits-ul">
78
+ {% for item in block.data.funding_items %}
79
+ <li class="hb-benefit-item"><span class="hb-benefit-text">{{ item | e }}</span></li>
80
+ {% endfor %}
81
+ </ul>
82
+ </div>
83
+ {% endif %}
84
+
85
+ {# ── Programs Table ── #}
86
+ {% if block.data.programs is not none %}
87
+ <div class="hb-qualify">To qualify for The International Scholars Program at {{ block.data.name | e }}, one must be
88
+ willing to study this course:</div>
89
+ {% if block.data.programs %}
90
+ <table class="hb-programs">
91
+ <thead>
92
+ <tr>
93
+ <th>Program</th>
94
+ <th>Designation</th>
95
+ <th>Entrance Examination</th>
96
+ </tr>
97
+ </thead>
98
+ <tbody>
99
+ {% for p in block.data.programs %}
100
+ <tr>
101
+ <td>{% if p.link %}<a href="{{ p.link | e }}" target="_blank" rel="noopener noreferrer">{{ p.name | e
102
+ }}</a>{% else %}{{ p.name | e }}{% endif %}</td>
103
+ <td>{{ p.designation | e }}</td>
104
+ <td>{{ p.entrance | e }}</td>
105
+
106
+ </tr>
107
+ {% endfor %}
108
+ </tbody>
109
+ </table>
110
+ {% else %}
111
+ <div class="hb-muted">No programs listed.</div>
112
+ {% endif %}
113
+ {% endif %}
114
+
115
+ {# ── Extra Sections ── #}
116
+ {% for extra_list in block.data.extra_blocks %}
117
+ {% for block in extra_list %}
118
+ {% include "partials/blocks/render_block.html" %}
119
+ {% endfor %}
120
+ {% endfor %}
121
+
122
+ </div>
app/templates/partials/blocks/table.html ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# Block partial: table (standard, comparison, spanning variants) #}
2
+ {% set variant = block.data.variant | default('standard') %}
3
+
4
+ {% if variant == 'comparison' %}
5
+ {# ── Comparison table (table_v2) ── #}
6
+ <table class="{{ block.css_class }}">
7
+ <thead>
8
+ {% if block.data.header_groups %}
9
+ <tr>
10
+ {% for c in block.data.base_columns %}
11
+ <th rowspan="2">{{ c.label | e }}</th>
12
+ {% endfor %}
13
+ {% for g in block.data.header_groups %}
14
+ <th colspan="{{ g.columns | length }}">{{ g.label | e }}</th>
15
+ {% endfor %}
16
+ </tr>
17
+ <tr>
18
+ {% for g in block.data.header_groups %}
19
+ {% for c in g.columns %}
20
+ <th>{{ c.label | e }}</th>
21
+ {% endfor %}
22
+ {% endfor %}
23
+ </tr>
24
+ {% else %}
25
+ <tr>
26
+ {% for c in block.data.all_columns %}
27
+ {% set col_label = c.label | default('') %}
28
+ {% set col_lc = col_label | lower %}
29
+ <th class="{% if col_lc == 'regular' %}is-regular-col{% elif col_lc == 'prime' %}is-prime-col{% endif %}">{{
30
+ c.label | e }}</th>
31
+ {% endfor %}
32
+ </tr>
33
+ {% endif %}
34
+ </thead>
35
+ <tbody>
36
+ {% for row in block.data.rows %}
37
+ <tr>
38
+ {% for c in block.data.all_columns %}
39
+ {% set col_label = c.label | default('') %}
40
+ {% set col_lc = col_label | lower %}
41
+ <td class="{% if col_lc == 'regular' %}is-regular-col{% elif col_lc == 'prime' %}is-prime-col{% endif %}">{{
42
+ row[c.key] | default('') | safe }}</td>
43
+ {% endfor %}
44
+ </tr>
45
+ {% endfor %}
46
+ </tbody>
47
+ </table>
48
+
49
+ {% elif variant == 'spanning' %}
50
+ {# ── Spanning table (table_v3 / table_v4) ── #}
51
+ <table class="{{ block.css_class }}">
52
+ <tbody>
53
+ {% for row in block.data.rows %}
54
+ <tr>
55
+ {% for cell in row %}
56
+ <td{% if cell.colspan> 1 %} colspan="{{ cell.colspan }}"{% endif %}{% if cell.rowspan > 1 %} rowspan="{{
57
+ cell.rowspan }}"{% endif %}>{{ cell.text | safe }}</td>
58
+ {% endfor %}
59
+ </tr>
60
+ {% endfor %}
61
+ </tbody>
62
+ </table>
63
+
64
+ {% else %}
65
+ {# ── Standard table ── #}
66
+ <table class="{{ block.css_class }}">
67
+ {% if block.data.columns %}
68
+ <thead>
69
+ <tr>
70
+ {% for col in block.data.columns %}
71
+ {% set col_lc = (col | lower) %}
72
+ <th class="{% if col_lc == 'regular' %}is-regular-col{% elif col_lc == 'prime' %}is-prime-col{% endif %}">{{
73
+ col | e }}</th>
74
+ {% endfor %}
75
+ </tr>
76
+ </thead>
77
+ {% endif %}
78
+ <tbody>
79
+ {% for row in block.data.rows %}
80
+ <tr>
81
+ {% for cell in row %}
82
+ {% set col = block.data.columns[loop.index0] if block.data.columns and loop.index0 < (block.data.columns |
83
+ length) else '' %} {% set col_lc=(col | lower) %} <td
84
+ class="{% if col_lc == 'regular' %}is-regular-col{% elif col_lc == 'prime' %}is-prime-col{% endif %}">{{
85
+ cell | safe }}</td>
86
+ {% endfor %}
87
+ </tr>
88
+ {% endfor %}
89
+ </tbody>
90
+ </table>
91
+ {% endif %}
app/templates/partials/blocks/university_summary.html ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {# Block partial: university_summary β€” numbered list of universities #}
2
+ <ol class="hb-university-list">
3
+ {% for name in block.data.universities %}
4
+ <li><strong>{{ name | e }}</strong></li>
5
+ {% endfor %}
6
+ </ol>
app/templates/partials/cover.html ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {# Cover page partial #}
2
+ <div class="cover-page">
3
+ <img class="cover-img" src="{{ cover_image }}" alt="Cover" />
4
+ </div>
app/templates/partials/section.html ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {# Generic section block partial β€” renders pre-built HTML from renderers.py #}
2
+ <div class="section-block{{ ' page-break' if page_break else '' }} {{ sec_class }}" id="{{ anchor | e }}"
3
+ data-section-key="{{ section_key | e }}" data-section-title="{{ section_title | e }}" {% if sort_order is not none
4
+ %}data-sort="{{ sort_order }}" {% endif %}>
5
+ {{ rendered_html }}
6
+ </div>
app/templates/partials/toc.html ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# Table of Contents β€” CSS dot-leader layout (single-line per entry) #}
2
+ <div class="toc">
3
+ <div class="toc-heading">CONTENTS</div>
4
+ <div class="toc-list">
5
+ {% for e in toc_items_sorted %}
6
+ {% if e.title %}
7
+ <div
8
+ class="toc-entry{{ ' toc-entry--major' if e.level == 0 else ' toc-entry--sub' }}{{ ' toc-entry--indent' if e.level >= 1 else '' }}">
9
+ <span class="toc-label">{% if e.target %}<a href="{{ e.target | e }}">{% endif %}{{ e.display_title | e }}{%
10
+ if e.target %}</a>{% endif %}</span>
11
+ <span class="toc-leader"></span>
12
+ <span class="toc-page">{% if e.page %}{{ e.page | e }}{% endif %}</span>
13
+ </div>
14
+ {% endif %}
15
+ {% endfor %}
16
+ </div>
17
+ </div>
app/templates/partials/university.html ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# University section partial β€” iterates within handbook.html #}
2
+ <div class="{{ uni.classes | join(' ') }}" {% if uni.anchor %}id="{{ uni.anchor | e }}" {% endif %} {% if uni.sort_order
3
+ is not none %}data-sort="{{ uni.sort_order }}" {% endif %} data-section-key="university"
4
+ data-section-title="{{ uni.name | e }}">
5
+
6
+ {# ── University Title ── #}
7
+ {% if uni.website %}
8
+ <div class="uni-name">
9
+ <a class="uni-name-link" href="{{ uni.website | e }}" target="_blank" rel="noopener noreferrer">{{ uni.name | e
10
+ }}</a>
11
+ </div>
12
+ {% else %}
13
+ <div class="uni-name">{{ uni.name | e }}</div>
14
+ {% endif %}
15
+
16
+ {# ── Two-column: Summary + Campus Image ── #}
17
+ <table class="school-top-table" cellspacing="0" cellpadding="0">
18
+ <tr>
19
+ <td class="school-top-summary">
20
+ {% if uni.overview %}
21
+ <ul class="summary-ul">
22
+ {% if uni.overview.founded %}<li><span class="lbl">Founded:</span> {{ uni.overview.founded | e }}
23
+ </li>{% endif %}
24
+ {% if uni.overview.total_students %}<li><span class="lbl">Total Students:</span> {{
25
+ uni.overview.total_students | e }}</li>{% endif %}
26
+ {% if uni.overview.undergraduates %}<li><span class="lbl">Undergraduate Students:</span> {{
27
+ uni.overview.undergraduates | e }}</li>{% endif %}
28
+ {% if uni.overview.postgraduates %}<li><span class="lbl">Postgraduate Students:</span> {{
29
+ uni.overview.postgraduates | e }}</li>{% endif %}
30
+ {% if uni.overview.acceptance_rate %}
31
+ <li><span class="lbl">Acceptance Rate:</span> {{ uni.overview.acceptance_rate | e }}</li>
32
+ {% endif %}
33
+ {% if uni.overview.location %}
34
+ <li><span class="lbl">Location:</span> {{ uni.overview.location | e }}</li>
35
+ {% endif %}
36
+ {% if uni.overview.tuition %}<li><span class="lbl">Yearly Tuition/Out-of-State Tuition:</span> {{
37
+ uni.overview.tuition | e }}</li>{% endif %}
38
+ </ul>
39
+ {% if uni.website %}
40
+ <div class="uni-website">
41
+ <span class="lbl">Website:</span>
42
+ <a href="{{ uni.website | e }}" target="_blank" rel="noopener noreferrer">{{ uni.website | e }}</a>
43
+ </div>
44
+ {% endif %}
45
+ {% endif %}
46
+ </td>
47
+ <td class="school-top-campus">
48
+ {% if uni.campus_image %}
49
+ <img class="campus-top-img" src="{{ uni.campus_image | e }}" alt="Campus Image" />
50
+ {% else %}
51
+ <div class="campus-placeholder-cell">Campus image unavailable</div>
52
+ {% endif %}
53
+ </td>
54
+ </tr>
55
+ </table>
56
+
57
+ {# ── Benefits ── #}
58
+ {% if uni.benefits is defined and uni.benefits is not none %}
59
+ <div class="benefits-section">
60
+ <div class="benefits-bar">Benefits for ISP Students</div>
61
+ {% if uni.benefits %}
62
+ <ul class="benefits-ul">
63
+ {% for b in uni.benefits %}
64
+ {% if b %}
65
+ <li class="benefit-li"><span class="benefit-bullet">&bull;</span> <span class="benefit-text">{{ b | e
66
+ }}</span></li>
67
+ {% endif %}
68
+ {% endfor %}
69
+ </ul>
70
+ {% else %}
71
+ <div class="muted">No benefits listed.</div>
72
+ {% endif %}
73
+ </div>
74
+ {% endif %}
75
+
76
+ {# ── Funding ── #}
77
+ {% if uni.funding_items %}
78
+ <div class="funding-section">
79
+ <div class="funding-bar">{{ uni.funding_heading | default('Funding Available') | e }}</div>
80
+ <ul class="funding-ul">
81
+ {% for item in uni.funding_items %}
82
+ <li class="funding-li"><span class="benefit-bullet">&bull;</span> <span class="benefit-text">{{ item | e
83
+ }}</span></li>
84
+ {% endfor %}
85
+ </ul>
86
+ </div>
87
+ {% endif %}
88
+
89
+ {# ── Programs Table ── #}
90
+ {% if uni.programs is defined %}
91
+ <div class="qualify">To qualify for The International Scholars Program at {{ uni.name | e }}, you must be willing to
92
+ study any of the following programs:</div>
93
+ {% if uni.programs %}
94
+ <table class="programs">
95
+ <thead>
96
+ <tr>
97
+ <th>Program</th>
98
+ <th>Designation</th>
99
+ <th>Entrance Examination</th>
100
+ </tr>
101
+ </thead>
102
+ <tbody>
103
+ {% for p in uni.programs %}
104
+ <tr>
105
+ <td>
106
+ {% if p.link %}<a href="{{ p.link | e }}" target="_blank" rel="noopener noreferrer">{{ p.name | e
107
+ }}</a>
108
+ {% else %}{{ p.name | e }}{% endif %}
109
+ </td>
110
+ <td>{{ p.designation | e }}</td>
111
+ <td>{{ p.entrance | e }}</td>
112
+ </tr>
113
+ {% endfor %}
114
+ </tbody>
115
+ </table>
116
+ {% else %}
117
+ <div class="muted">No programs listed.</div>
118
+ {% endif %}
119
+ {% endif %}
120
+
121
+ {# ── Extra Sections ── #}
122
+ {% for extra in uni.extra_sections %}
123
+ {{ extra.rendered_html }}
124
+ {% endfor %}
125
+
126
+ </div>
fonts/GOTHIC.TTF ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a9cbb5d75b2a2b0d22dc94571608e4e9dc7b88e825374985880c5722c1c9e5f
3
+ size 137568
fonts/GOTHICB.TTF ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90cb613b492874a560c0ff18a3402b1d24fb7e846dff11295d5c4644d6c75e83
3
+ size 129676
fonts/GOTHICBI.TTF ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc5949d57d2e172601fb6f5093c1fbf15a463e29ed47c4c8ff2434baf1c2b19
3
+ size 139084
fonts/GOTHICI.TTF ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cf57be6f9d0bd60bd5dc6eee7c11e87e5b19df210156495a524b974185b9fb9
3
+ size 148520
images/ISP Handbook_Global-60-66_page-0001.jpg ADDED

Git LFS Details

  • SHA256: 1552f511ba168a226999a3b74fce3aac3acd9283262fe6859ff18dd631bf7f01
  • Pointer size: 131 Bytes
  • Size of remote file: 705 kB
images/ISP Handbook_Global-60-66_page-0002.jpg ADDED

Git LFS Details

  • SHA256: 7f545312eccde68413a2c810a2d679ba039490ab59eaaf9d9ce11b608536570f
  • Pointer size: 131 Bytes
  • Size of remote file: 683 kB
images/IUP.webp ADDED

Git LFS Details

  • SHA256: d93a30bb6da3631b8da5e108fdbe525e7b56a5ee119570ede48f4b89c068829f
  • Pointer size: 131 Bytes
  • Size of remote file: 305 kB
images/LOPY-61-65_page-0003.jpg ADDED

Git LFS Details

  • SHA256: 7ce0d68a58c1a667ab57a46b3f6068d385162e5faf2dd50ab12ef1b8a17da892
  • Pointer size: 131 Bytes
  • Size of remote file: 922 kB