diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..76a73bd2133ae411b871c2ff792594e421dd30af --- /dev/null +++ b/.dockerignore @@ -0,0 +1,18 @@ +__pycache__/ +*.pyc +*.pyo +.env +.venv/ +venv/ +dist/ +*.egg-info/ +.pytest_cache/ +.mypy_cache/ +tests/ +*.md +*.pdf +app/handbook_pdf/ +app/__pycache__/ +images/*.pdf +_*.py +_*.html diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..21946354a9e3bde93a81cdef828902f5612807f9 --- /dev/null +++ b/.env.example @@ -0,0 +1,35 @@ +# ── App ── +APP_NAME=ISP Handbook Service +APP_VERSION=1.0.0 +DEBUG=false +PORT=7860 + +# ── Database (MySQL) ── +DB_HOST=localhost +DB_PORT=3306 +DB_USER=root +DB_PASSWORD= +DB_NAME=handbook +DB_CHARSET=utf8mb4 + +# ── External API endpoints (source-of-truth JSON APIs) ── +# If set, these override the computed URLs from API_BASE_URL + paths. +HANDBOOK_GENERAL_ENDPOINT= +UNIVERSITY_HANDBOOK_ENDPOINT= + +# Base URL of the PHP server hosting the JSON APIs +API_BASE_URL=https://finsapdev.qhtestingserver.com +GENERAL_SECTIONS_PATH=/MODEL_APIS/handbook_general_sections.php +UNIVERSITY_SECTIONS_PATH=/MODEL_APIS/university_handbook.php + +# ── Images directory ── +IMAGES_DIR=./images + +# ── Fonts directory ── +FONT_DIR=./fonts + +# ── CORS allowed origins (comma-separated) ── +CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173,https://finsapdev.qhtestingserver.com + +# ── HTTP timeout for upstream API calls (seconds) ── +HTTP_TIMEOUT=25 diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..2875a3c1b03b583cb0b1b7b0e543ae847f0be5bf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,35 +1,7 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.TTF filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1b282032ea72e5362e3cf4a5324054686bf4c516 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +__pycache__/ +*.pyc +*.pyo +.env +.venv/ +venv/ +dist/ +*.egg-info/ +.pytest_cache/ +.mypy_cache/ +app/handbook_pdf/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..64d08a0cf4022a97c432d3b88a7341d6a32665b3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,57 @@ +FROM python:3.12-slim + +# Playwright/Chromium system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + # Chromium dependencies + libnss3 \ + libnspr4 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libcups2 \ + libdrm2 \ + libxkbcommon0 \ + libxcomposite1 \ + libxdamage1 \ + libxrandr2 \ + libgbm1 \ + libpango-1.0-0 \ + libcairo2 \ + libasound2 \ + libatspi2.0-0 \ + libxshmfence1 \ + # Font rendering + fonts-liberation \ + fontconfig \ + # General utilities + wget \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Set browser path BEFORE install so Playwright puts browsers here +ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright + +# Install Playwright Chromium browser + all required system deps +RUN playwright install --with-deps chromium + +# Copy application code +COPY app/ ./app/ + +# Copy static assets (fonts & images used for PDF rendering) +COPY fonts/ ./fonts/ +COPY images/ ./images/ + +# Copy env example as fallback +COPY .env.example .env.example + +# Cloud Run injects PORT; HF Spaces uses 7860 +ENV PORT=7860 +EXPOSE 7860 + +# Single worker — Playwright+Chromium is memory-heavy. +# timeout-keep-alive=300 keeps the connection open during long PDF renders. +CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860} --workers 1 --timeout-keep-alive 300"] diff --git a/README.md b/README.md index cc370163dae29a4eee66aa1beeb7e1a294aa3cea..c7e4ded11d33ef544037acb9bc6324fda2ebc562 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,305 @@ --- -title: Handbook Engine -emoji: 🐢 -colorFrom: green -colorTo: red +title: ISP Handbook Engine +emoji: 📘 +colorFrom: blue +colorTo: indigo sdk: docker pinned: false --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# ISP Handbook Service — Python Migration + +A Python/FastAPI service that generates the ISP (International Scholars Program) Handbook as PDF or HTML. This is a drop-in replacement for the PHP handbook generation pipeline, designed to be called over HTTP from the existing PHP application. + +## Architecture + +``` +python_service/ +├── app/ +│ ├── main.py # FastAPI entry point +│ ├── api/ +│ │ └── routes.py # REST endpoints +│ ├── core/ +│ │ ├── config.py # Environment-based settings +│ │ ├── database.py # SQLAlchemy engine (MySQL) +│ │ ├── fonts.py # Century Gothic font management +│ │ └── logging.py # Logging setup +│ ├── models/ # SQLAlchemy models (if needed) +│ ├── repositories/ +│ │ └── handbook_repo.py # Direct DB access (fallback) +│ ├── schemas/ +│ │ └── handbook.py # Pydantic request/response models +│ └── services/ +│ ├── data_fetcher.py # Fetch data from external JSON APIs +│ ├── html_builder.py # Build full handbook HTML +│ ├── pdf_service.py # HTML -> PDF via WeasyPrint +│ ├── renderers.py # TOC, sections, university renderers +│ └── utils.py # Shared helpers (h, money format, etc.) +├── tests/ +│ ├── test_api.py +│ └── test_renderers.py +├── fonts/ # Century Gothic TTF files +├── images/ # Handbook images (cover, header, etc.) +├── css/ # Base stylesheet +├── Dockerfile +├── requirements.txt +├── .env.example +└── README.md +``` + +## API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/health` | Health check | +| `GET` | `/diagnostics/fonts` | Font file diagnostics | +| `GET` | `/api/v1/sections/global?catalog_id=0` | Fetch normalised global sections | +| `GET` | `/api/v1/sections/universities` | Fetch normalised university sections | +| `GET` | `/api/v1/handbook/pdf?catalog_id=0` | Generate PDF (download) | +| `POST` | `/api/v1/handbook/pdf` | Generate PDF with JSON body | +| `GET` | `/api/v1/handbook/html?catalog_id=0` | Generate HTML preview | +| `POST` | `/api/v1/handbook/render` | Generate PDF or HTML based on `output_format` | +| `GET` | `/docs` | Swagger UI | +| `GET` | `/redoc` | ReDoc UI | + +## Local Development + +### Prerequisites + +- Python 3.11+ +- MySQL database (existing schema — unchanged) +- Century Gothic font files in `fonts/` directory + +### Setup + +```bash +cd python_service + +# Create virtualenv +python -m venv .venv +.venv\Scripts\activate # Windows +# source .venv/bin/activate # Linux/Mac + +# Install dependencies +pip install -r requirements.txt + +# Copy and configure environment +copy .env.example .env +# Edit .env with your database credentials and API URLs +``` + +### Run + +```bash +uvicorn app.main:app --reload --host 0.0.0.0 --port 7860 +``` + +Visit http://localhost:7860/docs for the interactive API documentation. + +### Run Tests + +```bash +pytest tests/ -v +``` + +## Docker + +### Build + +```bash +docker build -t isp-handbook-service . +``` + +### Run + +```bash +docker run -d \ + --name handbook-service \ + -p 7860:7860 \ + -e DB_HOST=host.docker.internal \ + -e DB_USER=root \ + -e DB_PASSWORD=secret \ + -e DB_NAME=handbook \ + -e API_BASE_URL=https://finsapdev.qhtestingserver.com \ + isp-handbook-service +``` + +Or with an env file: + +```bash +docker run -d --name handbook-service -p 7860:7860 --env-file .env isp-handbook-service +``` + +## Hugging Face Spaces Deployment + +1. Create a new Space on Hugging Face with **Docker** SDK +2. Upload/push the `python_service/` directory as the Space root +3. Ensure `fonts/`, `images/`, and `css/` directories are included +4. Set environment variables (Secrets) in Space settings: + - `DB_HOST`, `DB_USER`, `DB_PASSWORD`, `DB_NAME` + - `API_BASE_URL` + - `PORT=7860` (default for HF Spaces) +5. The `Dockerfile` is already configured for HF Spaces (port 7860, `0.0.0.0`) + +**Important**: Hugging Face Spaces may not allow outbound MySQL connections. If direct DB access is needed, use the external API endpoint approach (the service fetches data from the PHP JSON APIs over HTTP, not from the database directly). + +## PHP Integration Example + +The PHP application can call this Python service over HTTP using cURL: + +```php + true, + CURLOPT_TIMEOUT => 5, + ]); + $body = curl_exec($ch); + $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + + if ($code !== 200) { + return ['ok' => false, 'error' => 'Service unreachable', 'http_code' => $code]; + } + return json_decode($body, true) ?? ['ok' => false, 'error' => 'Invalid response']; +} + +/** + * Generate and download the handbook PDF. + */ +function handbook_download_pdf(int $catalogId = 0, bool $debug = false): void { + $params = http_build_query([ + 'catalog_id' => $catalogId, + 'debug' => $debug ? 'true' : 'false', + ]); + $url = HANDBOOK_SERVICE_URL . '/api/v1/handbook/pdf?' . $params; + + $ch = curl_init($url); + curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TIMEOUT => 120, + CURLOPT_FOLLOWLOCATION => true, + ]); + $body = curl_exec($ch); + $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); + curl_close($ch); + + if ($code !== 200 || strpos($contentType, 'application/pdf') === false) { + http_response_code(502); + header('Content-Type: text/plain'); + echo "PDF generation failed (HTTP $code)"; + return; + } + + header('Content-Type: application/pdf'); + header('Content-Disposition: attachment; filename="ISP_Handbook.pdf"'); + header('Content-Length: ' . strlen($body)); + echo $body; +} + +/** + * Fetch global sections via the Python service. + */ +function handbook_get_sections(int $catalogId = 0): array { + $url = HANDBOOK_SERVICE_URL . '/api/v1/sections/global?catalog_id=' . $catalogId; + $ch = curl_init($url); + curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TIMEOUT => 25, + ]); + $body = curl_exec($ch); + curl_close($ch); + return json_decode($body, true) ?? []; +} + +/** + * Generate handbook via POST with custom options. + */ +function handbook_generate(array $options = []): string { + $url = HANDBOOK_SERVICE_URL . '/api/v1/handbook/render'; + $payload = json_encode(array_merge([ + 'catalog_id' => 0, + 'include_inactive_programs' => false, + 'debug' => false, + 'output_format' => 'pdf', + ], $options)); + + $ch = curl_init($url); + curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => $payload, + CURLOPT_HTTPHEADER => ['Content-Type: application/json'], + CURLOPT_TIMEOUT => 120, + ]); + $body = curl_exec($ch); + curl_close($ch); + return $body; +} +``` + +### Usage in PHP + +```php +// Health check +$status = handbook_health(); +if ($status['status'] === 'ok') { + echo "Service is running\n"; +} + +// Stream PDF to browser +handbook_download_pdf(catalogId: 1); + +// Get sections data +$sections = handbook_get_sections(catalogId: 1); +print_r($sections); +``` + +## Migration Notes & Assumptions + +### What was migrated + +| PHP Component | Python Equivalent | Notes | +|---|---|---| +| `common.php` (URL builder, HTTP client) | `data_fetcher.py` | Uses `httpx` instead of cURL | +| `cors.php` | FastAPI CORS middleware | Same origins preserved | +| `helpers.php` (`h()`, `respondJson()`) | Built into FastAPI + `utils.py` | | +| `fetchers.php` (global/uni data fetch) | `data_fetcher.py` | Identical normalisation logic | +| `renderers.php` (TOC, blocks, university) | `renderers.py` | All block types preserved | +| `html_builder.php` (`buildHandbookHtml`) | `html_builder.py` | Same HTML structure | +| `pdf.php` (Dompdf render) | `pdf_service.py` | **WeasyPrint** replaces Dompdf | +| `images.php` (image config) | `pdf_service.py` `_get_images_config()` | | +| `font_diagnostics.php` | `GET /diagnostics/fonts` | | +| `db.php` (mysqli) | `database.py` (SQLAlchemy) | Available but not primary path | + +### Key differences + +1. **PDF engine**: WeasyPrint replaces Dompdf. Layout may differ slightly in edge cases (table widths, page breaks). Both support `@font-face` with base64 TTF and `@page` rules. + +2. **TOC page numbers**: The PHP code uses a 2-pass Dompdf render to inject exact TOC page numbers via named destinations. WeasyPrint doesn't expose named destinations the same way. TOC pages are assigned sequentially in the initial migration. Exact page numbers can be added via a post-processing PDF pass if needed. + +3. **No auth**: The PHP code has no authentication. The Python service also has none. Add API key middleware if this service is exposed publicly. + +4. **Data source**: The service fetches data from the same two PHP JSON APIs over HTTP (not directly from the database). The `repositories/handbook_repo.py` provides a DB fallback if you want to bypass the PHP APIs entirely. + +5. **SSL verification**: Disabled for internal API calls (`verify=False` in httpx), matching the PHP behavior (`CURLOPT_SSL_VERIFYPEER => false`). + +### Risks + +- **Font rendering**: Century Gothic rendering may differ slightly between Dompdf (PHP) and WeasyPrint (Python). Test with actual fonts. +- **Page break behavior**: Dompdf and WeasyPrint handle CSS `page-break-*` properties slightly differently. +- **Image embedding**: Remote campus images are fetched at generation time. Network issues will result in placeholder cells (same as PHP behavior). +- **Memory**: Large handbooks with many university images may require significant memory. The Dockerfile doesn't set memory limits — Hugging Face Spaces has its own limits. diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/api/routes.py b/app/api/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..09e264497e89669d3e1424cc59faa91a897b0d00 --- /dev/null +++ b/app/api/routes.py @@ -0,0 +1,214 @@ +"""API router — handbook endpoints. + +Exposes REST endpoints that the PHP application calls over HTTP. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from fastapi import APIRouter, HTTPException, Query +from fastapi.responses import HTMLResponse, Response + +from app.schemas.handbook import ( + ErrorResponse, + FontDiagnosticsResponse, + GlobalSectionsResponse, + HandbookRequest, + HealthResponse, + SectionItem, + UniversitySectionsResponse, + UniversityPayload, +) + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +# ── Root / HF health probe ── + +@router.get("/", tags=["system"]) +async def root(): + """Root endpoint — HF Spaces probes this URL for health checks.""" + return {"status": "ok"} + + +# ── Health check ── + +@router.get("/health", response_model=HealthResponse, tags=["system"]) +async def health_check(): + """Health check endpoint.""" + from app.core.config import get_settings + settings = get_settings() + return HealthResponse( + status="ok", + service=settings.app_name, + version=settings.app_version, + ) + + +# ── Font diagnostics ── + +@router.get("/diagnostics/fonts", tags=["system"]) +async def font_diagnostics(): + """Font diagnostics endpoint. Mirrors PHP font_diagnostics.php.""" + from app.core.fonts import font_diagnostics as _diag + try: + result = _diag() + return result + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) + + +# ── Global sections (proxy/fetch) ── + +@router.get("/api/v1/sections/global", tags=["sections"]) +async def get_global_sections(catalog_id: int = Query(0, description="Catalog ID filter")): + """Fetch global handbook sections from the upstream API. + + Returns normalised section data identical to what the PHP code produces. + """ + from app.services.data_fetcher import fetch_global_sections + + try: + sections = await fetch_global_sections(catalog_id) + return { + "ok": True, + "general_sections": sections, + "count": len(sections), + } + except Exception as exc: + logger.exception("Failed to fetch global sections") + raise HTTPException(status_code=502, detail=str(exc)) + + +# ── University sections (proxy/fetch) ── + +@router.get("/api/v1/sections/universities", tags=["sections"]) +async def get_university_sections(): + """Fetch university handbook sections from the upstream API.""" + from app.services.data_fetcher import fetch_university_sections + + try: + by_uni = await fetch_university_sections() + return { + "ok": True, + "universities": by_uni, + "count": len(by_uni), + } + except Exception as exc: + logger.exception("Failed to fetch university sections") + raise HTTPException(status_code=502, detail=str(exc)) + + +# ── Generate handbook (HTML or PDF) ── + +@router.get("/api/v1/handbook/pdf", tags=["handbook"]) +async def generate_handbook_pdf_get( + catalog_id: int = Query(0), + include_inactive_programs: bool = Query(False), + debug: bool = Query(False), +): + """Generate the ISP Handbook as a PDF download (GET for easy PHP integration).""" + from app.services.pdf_service import generate_handbook_pdf + + try: + pdf_bytes = await generate_handbook_pdf( + catalog_id=catalog_id, + include_inactive_programs=include_inactive_programs, + debug=debug, + ) + return Response( + content=pdf_bytes, + media_type="application/pdf", + headers={ + "Content-Disposition": 'attachment; filename="ISP_Handbook.pdf"', + "Cache-Control": "private, max-age=0, must-revalidate", + }, + ) + except Exception as exc: + logger.exception("PDF generation failed") + raise HTTPException(status_code=500, detail=str(exc)) + + +@router.post("/api/v1/handbook/pdf", tags=["handbook"]) +async def generate_handbook_pdf_post(request: HandbookRequest): + """Generate the ISP Handbook as a PDF download (POST with body).""" + from app.services.pdf_service import generate_handbook_pdf + + try: + pdf_bytes = await generate_handbook_pdf( + catalog_id=request.catalog_id, + include_inactive_programs=request.include_inactive_programs, + debug=request.debug, + ) + return Response( + content=pdf_bytes, + media_type="application/pdf", + headers={ + "Content-Disposition": 'attachment; filename="ISP_Handbook.pdf"', + "Cache-Control": "private, max-age=0, must-revalidate", + }, + ) + except Exception as exc: + logger.exception("PDF generation failed") + raise HTTPException(status_code=500, detail=str(exc)) + + +@router.get("/api/v1/handbook/html", tags=["handbook"]) +async def generate_handbook_html_get( + catalog_id: int = Query(0), + include_inactive_programs: bool = Query(False), + debug: bool = Query(False), +): + """Generate the ISP Handbook as raw HTML (useful for preview/debugging).""" + from app.services.pdf_service import generate_handbook_html + + try: + html = await generate_handbook_html( + catalog_id=catalog_id, + include_inactive_programs=include_inactive_programs, + debug=debug, + ) + return HTMLResponse(content=html) + except Exception as exc: + logger.exception("HTML generation failed") + raise HTTPException(status_code=500, detail=str(exc)) + + +@router.post("/api/v1/handbook/render", tags=["handbook"]) +async def render_handbook(request: HandbookRequest): + """Generate handbook in the requested format (pdf or html).""" + if request.output_format == "html": + from app.services.pdf_service import generate_handbook_html + try: + html = await generate_handbook_html( + catalog_id=request.catalog_id, + include_inactive_programs=request.include_inactive_programs, + debug=request.debug, + ) + return HTMLResponse(content=html) + except Exception as exc: + logger.exception("HTML generation failed") + raise HTTPException(status_code=500, detail=str(exc)) + else: + from app.services.pdf_service import generate_handbook_pdf + try: + pdf_bytes = await generate_handbook_pdf( + catalog_id=request.catalog_id, + include_inactive_programs=request.include_inactive_programs, + debug=request.debug, + ) + return Response( + content=pdf_bytes, + media_type="application/pdf", + headers={ + "Content-Disposition": 'attachment; filename="ISP_Handbook.pdf"', + "Cache-Control": "private, max-age=0, must-revalidate", + }, + ) + except Exception as exc: + logger.exception("PDF generation failed") + raise HTTPException(status_code=500, detail=str(exc)) diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/core/config.py b/app/core/config.py new file mode 100644 index 0000000000000000000000000000000000000000..414c96d72961c2f344047676f4567bdc810b27d7 --- /dev/null +++ b/app/core/config.py @@ -0,0 +1,57 @@ +"""Application configuration via environment variables.""" + +import os +from functools import lru_cache +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + """All config comes from environment variables or .env file.""" + + # App + app_name: str = "ISP Handbook Service" + app_version: str = "1.0.0" + debug: bool = False + port: int = 7860 # Hugging Face Spaces default + + # External API endpoints (the source-of-truth JSON APIs) + handbook_general_endpoint: str = "" + university_handbook_endpoint: str = "" + api_base_url: str = "https://finsapdev.qhtestingserver.com" + general_sections_path: str = "/MODEL_APIS/handbook_general_sections.php" + university_sections_path: str = "/MODEL_APIS/university_handbook.php" + + # Images + images_dir: str = "./images" + + # Fonts + font_dir: str = "./fonts" + + # CORS + cors_origins: str = "http://localhost:5173,http://127.0.0.1:5173,https://finsapdev.qhtestingserver.com,https://internationalscholarsdev.qhtestingserver.com" + + # Request timeouts + http_timeout: int = 25 + + model_config = {"env_file": ".env", "env_file_encoding": "utf-8", "extra": "ignore"} + + @property + def cors_origins_list(self) -> list[str]: + return [o.strip() for o in self.cors_origins.split(",") if o.strip()] + + @property + def general_endpoint_url(self) -> str: + if self.handbook_general_endpoint: + return self.handbook_general_endpoint + return self.api_base_url.rstrip("/") + self.general_sections_path + + @property + def university_endpoint_url(self) -> str: + if self.university_handbook_endpoint: + return self.university_handbook_endpoint + return self.api_base_url.rstrip("/") + self.university_sections_path + + +@lru_cache() +def get_settings() -> Settings: + return Settings() diff --git a/app/core/fonts.py b/app/core/fonts.py new file mode 100644 index 0000000000000000000000000000000000000000..4f97a1719f2b257444b02cb7a993b762d0ed3971 --- /dev/null +++ b/app/core/fonts.py @@ -0,0 +1,105 @@ +"""Font file management — Century Gothic only. + +Mirrors the PHP handbook_select_font_family / handbook_font_face_css logic. +""" + +import base64 +import os +from pathlib import Path + +from app.core.config import get_settings + + +class FontError(RuntimeError): + pass + + +VARIANTS = ("regular", "bold", "italic", "bold_italic") +FILE_MAP = { + "regular": "GOTHIC.TTF", + "bold": "GOTHICB.TTF", + "italic": "GOTHICI.TTF", + "bold_italic": "GOTHICBI.TTF", +} + + +def _font_dir() -> Path: + return Path(get_settings().font_dir) + + +def select_font_family() -> dict: + """Return font metadata dict. Raises FontError if any file is missing.""" + font_dir = _font_dir() + paths: dict[str, Path] = {} + for variant, filename in FILE_MAP.items(): + p = font_dir / filename + if not p.is_file(): + raise FontError( + f'Century Gothic font file missing for variant "{variant}": {p}' + ) + paths[variant] = p + + return { + "family": "Century Gothic", + "regular": str(paths["regular"]), + "bold": str(paths["bold"]), + "italic": str(paths["italic"]), + "bold_italic": str(paths["bold_italic"]), + "status": "primary", + } + + +def font_face_css(font_meta: dict | None = None) -> str: + """Generate @font-face CSS with base64-embedded TTF data.""" + meta = font_meta or select_font_family() + family = meta.get("family", "Century Gothic") + + encoded: dict[str, str] = {} + for variant in VARIANTS: + path = meta.get(variant) + if not path or not os.path.isfile(path): + raise FontError( + f'Century Gothic font file missing for variant "{variant}": {path}' + ) + with open(path, "rb") as f: + data = base64.b64encode(f.read()).decode("ascii") + if not data: + raise FontError(f"Failed to read/encode font file: {path}") + encoded[variant] = data + + css_parts = [] + weight_style = { + "regular": ("400", "normal"), + "bold": ("700", "normal"), + "italic": ("400", "italic"), + "bold_italic": ("700", "italic"), + } + for variant, (weight, style) in weight_style.items(): + css_parts.append( + f"@font-face {{\n" + f" font-family: '{family}';\n" + f" src: url('data:font/ttf;base64,{encoded[variant]}') format('truetype');\n" + f" font-weight: {weight};\n" + f" font-style: {style};\n" + f"}}" + ) + + return "\n".join(css_parts) + + +def font_diagnostics() -> dict: + """Return diagnostic info about font availability.""" + font_dir = _font_dir() + result = { + "font_dir": str(font_dir), + "font_dir_exists": font_dir.is_dir(), + "variants": {}, + } + for variant, filename in FILE_MAP.items(): + p = font_dir / filename + result["variants"][variant] = { + "path": str(p), + "exists": p.is_file(), + "size_bytes": p.stat().st_size if p.is_file() else 0, + } + return result diff --git a/app/core/logging.py b/app/core/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..9e72bae4ecbe754a43486c7d3030006d803d8128 --- /dev/null +++ b/app/core/logging.py @@ -0,0 +1,10 @@ +"""Centralised logging setup.""" + +import logging +import sys + + +def setup_logging(debug: bool = False) -> None: + level = logging.DEBUG if debug else logging.INFO + fmt = "%(asctime)s [%(levelname)s] %(name)s: %(message)s" + logging.basicConfig(stream=sys.stdout, level=level, format=fmt, force=True) diff --git a/app/core/theme.py b/app/core/theme.py new file mode 100644 index 0000000000000000000000000000000000000000..2414bb832a161bd6a578575fc508a48b77928a52 --- /dev/null +++ b/app/core/theme.py @@ -0,0 +1,169 @@ +"""Centralized handbook visual theme — single source of truth. + +All colour values, font sizes, spacing, and rendering tokens live here. +Templates, CSS generation, and renderers reference this module instead +of hardcoding visual rules. + +Spec source: ISP Handbook Enhancement Guidelines + sample PDF. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +# ── Colour palette ────────────────────────────────────────────── + +@dataclass(frozen=True) +class Colors: + """Every colour used in the handbook, named by purpose.""" + + heading_blue: str = "#0263A3" + heading_green: str = "#199970" + body_text: str = "#000000" + toc_text: str = "#111111" + note_red: str = "#C00000" + link_blue: str = "#0263A3" + benefits_header_bg: str = "#00F600" + benefits_header_fg: str = "#FFFFFF" + benefit_item_bg: str = "#00FCFC" + benefit_item_fg: str = "#000000" + school_info_green: str = "#199970" + table_border: str = "#333333" + table_header_bg: str = "#E6E6E6" + table_header_fg: str = "#333333" + toc_dots: str = "#777777" + muted: str = "#666666" + note_bg: str = "#F7F8FA" + note_border: str = "#BBBBBB" + page_bg: str = "#FFFFFF" + + +# ── Typography ────────────────────────────────────────────────── + +@dataclass(frozen=True) +class Typography: + """Font families, sizes, weights, and line-heights.""" + + font_family: str = "'Century Gothic', 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif" + font_size_body: str = "10pt" + font_size_h1: str = "12pt" + font_size_h2: str = "12pt" + font_size_h3: str = "10pt" + font_size_toc_heading: str = "12pt" + font_size_toc_item: str = "10pt" + font_size_table: str = "9.5px" + font_size_programs_table: str = "8.5px" + font_size_career_list: str = "8.5px" + font_size_note: str = "9.5px" + font_size_benefits_header: str = "10.5px" + font_size_benefit_item: str = "10px" + font_size_school_name: str = "12pt" + font_size_summary_label: str = "10.5px" + font_size_summary_value: str = "9.5px" + font_size_qualify: str = "10px" + line_height_body: str = "1.4" + line_height_heading: str = "1.2" + line_height_table: str = "1.25" + + +# ── Spacing / margins ────────────────────────────────────────── + +@dataclass(frozen=True) +class Spacing: + """Page geometry and element margins. All margins: 2.54cm.""" + + page_margin_top: str = "2.54cm" + page_margin_right: str = "2.54cm" + page_margin_bottom: str = "2.54cm" + page_margin_left: str = "2.54cm" + paragraph_margin: str = "2px 0 8px" + heading_margin_h1: str = "12px 0 6px" + heading_margin_h2: str = "10px 0 4px" + list_margin: str = "2px 0 8px 18px" + note_padding: str = "6px 8px" + note_margin: str = "6px 0 8px" + table_margin: str = "6px 0 10px" + table_cell_padding: str = "5px 6px" + benefits_margin: str = "4px 0 4px" + school_top_summary_width: str = "58%" + school_top_campus_width: str = "42%" + + +# ── Table column widths ──────────────────────────────────────── + +@dataclass(frozen=True) +class ProgramTableColumns: + """Fixed widths for the 5-column programs table.""" + + program: str = "30%" + designation: str = "20%" + entrance_exam: str = "20%" + funding: str = "30%" + + +# ── Bullet characters ────────────────────────────────────────── + +@dataclass(frozen=True) +class Bullets: + """Bullet characters used throughout the handbook.""" + + primary: str = "\u27A2" # ➢ + benefit: str = "\u2022" # • + career: str = "disc" # CSS list-style-type for career lists + + +# ── Render-block type registry ────────────────────────────────── + +BLOCK_TYPES = ( + "heading_1", + "heading_2", + "paragraph", + "bullet_list", + "note", + "table", + "enrollment_steps", + "school_profile", + "university_summary", + "toc", + "cover", + "full_page_image", +) + + +# ── Composed theme object ────────────────────────────────────── + +@dataclass(frozen=True) +class HandbookTheme: + """Complete handbook theme — inject into renderers and templates.""" + + colors: Colors = field(default_factory=Colors) + typography: Typography = field(default_factory=Typography) + spacing: Spacing = field(default_factory=Spacing) + program_columns: ProgramTableColumns = field(default_factory=ProgramTableColumns) + bullets: Bullets = field(default_factory=Bullets) + + def css_vars(self) -> dict[str, str]: + """Flatten theme to CSS custom properties (--hb-*).""" + v: dict[str, str] = {} + # Colors + for fname in Colors.__dataclass_fields__: + v[f"--hb-{fname.replace('_', '-')}"] = getattr(self.colors, fname) + # Typography + v["--hb-font-family"] = self.typography.font_family + v["--hb-font-size-body"] = self.typography.font_size_body + v["--hb-font-size-h1"] = self.typography.font_size_h1 + v["--hb-font-size-h2"] = self.typography.font_size_h2 + v["--hb-line-height-body"] = self.typography.line_height_body + # Spacing + v["--hb-page-margin-top"] = self.spacing.page_margin_top + v["--hb-page-margin-right"] = self.spacing.page_margin_right + v["--hb-page-margin-bottom"] = self.spacing.page_margin_bottom + v["--hb-page-margin-left"] = self.spacing.page_margin_left + # Bullet + v["--hb-bullet-char"] = f'"{self.bullets.primary}"' + return v + + +# Module-level singleton +THEME = HandbookTheme() diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..5d78ec51468eeb9cde1d0934b436925b9e2a10d2 --- /dev/null +++ b/app/main.py @@ -0,0 +1,64 @@ +"""FastAPI application entry point.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles + +from app.api.routes import router +from app.core.config import get_settings +from app.core.logging import setup_logging + +settings = get_settings() +setup_logging(settings.debug) + +logger = logging.getLogger(__name__) + +app = FastAPI( + title=settings.app_name, + version=settings.app_version, + docs_url="/docs", + redoc_url="/redoc", + openapi_url="/openapi.json", +) + +# CORS — mirrors PHP cors.php allowed origins +app.add_middleware( + CORSMiddleware, + allow_origins=settings.cors_origins_list, + allow_credentials=False, + allow_methods=["GET", "POST", "OPTIONS"], + allow_headers=["*"], + expose_headers=["Content-Disposition", "Content-Length", "Content-Type"], +) + +# Serve static assets (CSS, images) for Playwright to load via file:// +# Also accessible at /static/ for debugging +_static_dir = Path(__file__).resolve().parent / "static" +if _static_dir.is_dir(): + app.mount("/static", StaticFiles(directory=str(_static_dir)), name="static") + +app.include_router(router) + + +@app.on_event("startup") +async def startup_event(): + logger.info( + "%s v%s starting on port %d (debug=%s, renderer=playwright)", + settings.app_name, + settings.app_version, + settings.port, + settings.debug, + ) + + +@app.on_event("shutdown") +async def shutdown_event(): + """Gracefully close the Playwright browser on shutdown.""" + from app.services.pdf_renderer import shutdown_browser + await shutdown_browser() + logger.info("Application shutdown complete") diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/repositories/__init__.py b/app/repositories/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/schemas/handbook.py b/app/schemas/handbook.py new file mode 100644 index 0000000000000000000000000000000000000000..c583e9f2748c82ed24416408c06145aaf880253b --- /dev/null +++ b/app/schemas/handbook.py @@ -0,0 +1,90 @@ +"""Pydantic schemas for request/response validation. + +These mirror the data shapes used by the PHP code — section_json structures, +university payloads, and API responses. +""" + +from __future__ import annotations + +from typing import Any + +from pydantic import BaseModel, Field + + +# ── Section-level schemas ── + + +class SectionItem(BaseModel): + """A single handbook section (global or university-level).""" + + section_key: str = "" + section_title: str = "" + section_json: dict[str, Any] | list[Any] = Field(default_factory=dict) + sort_order: int | None = None + id: int | None = None + + +class UniversityPayload(BaseModel): + """A university with its sections.""" + + university_id: int = 0 + university_name: str = "" + is_active: bool = True + website: str = "" + sections: list[SectionItem] = Field(default_factory=list) + sort_order: int | None = None + + +# ── API response wrappers ── + + +class GlobalSectionsResponse(BaseModel): + ok: bool = True + general_sections: list[SectionItem] = Field(default_factory=list) + + +class UniversitySectionsResponse(BaseModel): + ok: bool = True + universities: list[UniversityPayload] = Field(default_factory=list) + + +# ── Handbook generation request ── + + +class HandbookRequest(BaseModel): + """Request body for handbook generation.""" + + catalog_id: int = 0 + include_inactive_programs: bool = False + debug: bool = False + output_format: str = Field( + default="pdf", description="'pdf' or 'html'" + ) + + +# ── Health check ── + + +class HealthResponse(BaseModel): + status: str = "ok" + service: str = "handbook-service" + version: str = "1.0.0" + + +# ── Font diagnostics ── + + +class FontDiagnosticsResponse(BaseModel): + library: str = "playwright" + font_dir: str = "" + font_dir_exists: bool = False + variants: dict[str, dict[str, Any]] = Field(default_factory=dict) + + +# ── Generic error ── + + +class ErrorResponse(BaseModel): + ok: bool = False + error: str = "" + detail: str = "" diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/services/data_fetcher.py b/app/services/data_fetcher.py new file mode 100644 index 0000000000000000000000000000000000000000..a174edd281088cbb03857a6392b5d49179387b22 --- /dev/null +++ b/app/services/data_fetcher.py @@ -0,0 +1,242 @@ +"""Data fetcher service — mirrors PHP fetchers.php. + +Fetches handbook data from the two external JSON APIs (source of truth), +normalises the payloads, and returns typed dicts identical to what the +PHP code produced. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any + +import httpx + +from app.core.config import get_settings + +logger = logging.getLogger(__name__) + + +def _normalize_section_json(raw: Any, context: str, sid: Any = None) -> dict | list: + """Mirrors PHP handbook_normalize_section_json.""" + if isinstance(raw, dict) or isinstance(raw, list): + return raw + if isinstance(raw, str): + raw = raw.strip() + if not raw: + return {} + try: + decoded = json.loads(raw) + if isinstance(decoded, (dict, list)): + return decoded + except (json.JSONDecodeError, ValueError): + logger.warning( + "section_json parse failed ctx=%s id=%s snippet=%.180s", + context, sid, raw, + ) + return {} + return {} + + +def _is_truthy(val: Any) -> bool: + """Mirrors PHP handbook_true.""" + if isinstance(val, bool): + return val + if isinstance(val, int): + return val != 0 + s = str(val).lower().strip() + return s not in ("0", "false", "") + + +def _tier_section_rank(section_key: str) -> int: + """Return sort priority for tier-related section keys. + + Tier One sections sort before Tier Two; non-tier sections get 99 (neutral). + """ + k = section_key.lower().replace("-", "_").replace(" ", "_") + if "tier_one" in k or "non_cosigner" in k: + return 0 + if "tier_two" in k or k in ("cosigner_schools", "cosigner"): + return 1 + return 99 + + +def _sort_sections_stable(sections: list[dict]) -> list[dict]: + """Mirrors PHP sortHandbookSectionsStable with tier-aware tiebreaker.""" + for i, s in enumerate(sections): + s.setdefault("_i", i) + + def sort_key(s: dict): + so = s.get("sort_order") + sid = s.get("id") + # None values sort after numeric values + so_key = (0, so) if so is not None else (1, 0) + # Tier-aware tiebreaker: Tier One before Tier Two when sort_order ties + tier_rank = _tier_section_rank(str(s.get("section_key", ""))) + sid_key = (0, sid) if sid is not None else (1, 0) + return (so_key, tier_rank, sid_key, s.get("_i", 0)) + + sections.sort(key=sort_key) + for s in sections: + s.pop("_i", None) + return sections + + +async def fetch_global_sections(catalog_id: int = 0) -> list[dict[str, Any]]: + """Fetch and normalise global handbook sections from the external API. + + Mirrors PHP fetchGlobalSections(). + """ + settings = get_settings() + url = settings.general_endpoint_url + if catalog_id: + sep = "&" if "?" in url else "?" + url += f"{sep}catalog_id={catalog_id}" + + try: + async with httpx.AsyncClient(verify=False, timeout=settings.http_timeout) as client: + resp = await client.get(url) + resp.raise_for_status() + payload = resp.json() + except Exception as exc: + logger.error("Global sections fetch failed: %s url=%s", exc, url) + return [] + + if not payload.get("ok"): + logger.warning("Global sections API returned ok=false: %s", payload) + return [] + + # Accept common shapes + sections_raw = ( + payload.get("general_sections") + or payload.get("sections") + or payload.get("globals") + or payload.get("data") + or [] + ) + if not isinstance(sections_raw, list): + sections_raw = [] + + out: list[dict[str, Any]] = [] + for i, s in enumerate(sections_raw): + if not isinstance(s, dict): + continue + + k = str(s.get("section_key", "")) + t = str(s.get("section_title", "")) + j = _normalize_section_json(s.get("section_json", {}), "global", s.get("id")) + + sort_raw = s.get("sort_order") or s.get("sortOrder") + sort_val = int(sort_raw) if sort_raw is not None and str(sort_raw).lstrip("-").isdigit() else None + + if not k and not t and (not j or j == {}): + continue + + out.append({ + "section_key": k, + "section_title": t, + "section_json": j, + "sort_order": sort_val, + "id": int(s["id"]) if s.get("id") is not None else None, + "_i": i, + }) + + out = _sort_sections_stable(out) + + logger.info( + "Global sections fetched catalog_id=%d count=%d keys=%s", + catalog_id, + len(out), + [s.get("section_key") for s in out], + ) + return out + + +async def fetch_university_sections() -> dict[int, dict[str, Any]]: + """Fetch and normalise university handbook sections. + + Returns dict keyed by university_id. + Mirrors PHP fetchUniversitySections(). + """ + settings = get_settings() + url = settings.university_endpoint_url + + try: + async with httpx.AsyncClient(verify=False, timeout=settings.http_timeout) as client: + resp = await client.get(url) + resp.raise_for_status() + payload = resp.json() + except Exception as exc: + logger.error("University sections fetch failed: %s url=%s", exc, url) + return {} + + if not payload.get("ok"): + logger.warning("University sections API returned ok=false") + return {} + + universities = payload.get("universities", []) + if not isinstance(universities, list): + universities = [] + + by_uni: dict[int, dict[str, Any]] = {} + for u in universities: + if not isinstance(u, dict): + continue + uid = int(u.get("university_id", 0)) + if uid <= 0: + continue + + name = str(u.get("university_name", f"University #{uid}")) + is_active_raw = u.get("is_active", u.get("isActive", 1)) + website = str(u.get("website", u.get("website_url", ""))) + is_active = _is_truthy(is_active_raw) + + sections_raw = u.get("sections", []) + if not isinstance(sections_raw, list): + sections_raw = [] + + norm_sections: list[dict[str, Any]] = [] + for s in sections_raw: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + t = str(s.get("section_title", "")) + j = _normalize_section_json(s.get("section_json", {}), "university", s.get("id")) + if not k and not t and (not j or j == {}): + continue + norm_sections.append({ + "section_key": k, + "section_title": t, + "section_json": j, + }) + + # Derive tier from school_category (backward-compatible — older APIs may omit these) + school_category = str(u.get("school_category", "")).strip() + tier = u.get("tier") + tier_label = u.get("tier_label", "") + if tier is None and school_category: + # Derive from school_category if tier not explicitly provided + if school_category == "non_cosigner": + tier, tier_label = 1, "Tier One" + elif school_category == "cosigner": + tier, tier_label = 2, "Tier Two" + + by_uni[uid] = { + "university_name": name, + "sections": norm_sections, + "is_active": is_active, + "website": website, + "school_category": school_category, + "tier": tier, + "tier_label": tier_label or "", + } + + # Sort: Tier One (non_cosigner) first, then Tier Two (cosigner), then by name + def _uni_sort_key(item: tuple[int, dict]) -> tuple: + uid, data = item + t = data.get("tier") + tier_rank = t if isinstance(t, int) else 99 + return (tier_rank, data.get("university_name", "").lower(), uid) + + return dict(sorted(by_uni.items(), key=_uni_sort_key)) diff --git a/app/services/html_builder.py b/app/services/html_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..61299398c7e4f9a01bdabdcbe1228bc9611bf20c --- /dev/null +++ b/app/services/html_builder.py @@ -0,0 +1,650 @@ +"""HTML builder — assembles the full ISP Handbook HTML document. + +Uses Jinja2 templates for HTML generation. Data preparation logic is +preserved from the original string-concatenation approach. The output +is a self-contained HTML suitable for Playwright Chromium PDF export. +""" + +from __future__ import annotations + +import base64 +import logging +import mimetypes +import os +import re +from pathlib import Path +from typing import Any + +from jinja2 import Environment, FileSystemLoader, select_autoescape +from markupsafe import Markup + +from app.core.config import get_settings +from app.core.fonts import font_face_css, select_font_family +from app.services.normalizer import normalize_section, normalize_university +from app.services.renderers import ( + fetch_image_data_uri, + render_global_blocks, + sort_toc, + _extract_university_funding, +) +from app.services.utils import ( + format_money_figures, + get_any, + h, + handbook_anchor, + hb_slug, + is_truthy, + sort_sections_stable, +) + +logger = logging.getLogger(__name__) + +# Jinja2 environment — templates live alongside the app package +_TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "templates" + + +def _get_jinja_env() -> Environment: + """Create a Jinja2 environment pointing to our templates directory.""" + env = Environment( + loader=FileSystemLoader(str(_TEMPLATES_DIR)), + autoescape=select_autoescape(["html"]), + trim_blocks=True, + lstrip_blocks=True, + ) + return env + + +def _static_base_url() -> str: + """Return absolute file:// URL to the static directory.""" + static_dir = Path(__file__).resolve().parent.parent / "static" + return static_dir.as_uri() + + +def _unused_pdf_override_css(font_stack: str) -> str: + """Legacy inline PDF override CSS — kept for reference only. + All styling now lives in static/css/print.css for Chromium rendering. + """ + return "" + + +# Section class map +SECTION_CLASS_MAP = { + "overview": "sec-overview", + "how_the_program_works": "sec-how", + "qualification_requirements": "sec-qualification", + "enrolment_steps": "sec-steps", + "withdrawal_refund_policy": "sec-policy", + "refund_guidelines": "sec-refund", + "program_contributions": "sec-contributions", + "program_features_breakdown": "sec-breakdown", + "funding_options_available": "sec-funding", + "summary_of_universities": "sec-summary", + "summary_of_universities_cosigner": "sec-summary-cosigner", +} + +PAGE_BREAK_KEYS = { + "overview", + "how_the_program_works", + "qualification_requirements", + "enrolment_steps", + "withdrawal_refund_policy", + "refund_guidelines", + "program_contributions", + "program_features_breakdown", + "funding_options_available", + "summary_of_universities", + "summary_of_universities_cosigner", +} + + +def _collect_program_option_inconsistencies(value: Any, path: str, hits: list[str]) -> None: + """Collect paths where only REGULAR or PRIME appears.""" + if isinstance(value, dict): + for k, v in value.items(): + _collect_program_option_inconsistencies(v, f"{path}.{k}" if path else str(k), hits) + return + if isinstance(value, list): + for i, v in enumerate(value): + _collect_program_option_inconsistencies(v, f"{path}[{i}]", hits) + return + if value is None: + return + + text = str(value) + has_regular = bool(re.search(r"\bREGULAR\b", text, flags=re.IGNORECASE)) + has_prime = bool(re.search(r"\bPRIME\b", text, flags=re.IGNORECASE)) + if has_regular ^ has_prime: + hits.append(path) + + +def _prepare_university_data( + uni_raw: dict[str, Any], + allow_remote: bool, + include_inactive_programs: bool, + debug: bool, + stats: dict[str, Any], +) -> dict[str, Any]: + """Prepare a single university's template data. + + Extracts overview, campus image, benefits, programs, and extra sections + from the raw sections list. This moves the logic that was in + render_university_section into a data-preparation step so that the + Jinja2 template handles the HTML. + """ + uni_name = uni_raw["name"] + sections = uni_raw.get("sections", []) + is_first = uni_raw.get("_is_first", False) + + stats["universities"] = stats.get("universities", 0) + 1 + + # Build section map; merge duplicate "programs" + sec_map: dict[str, dict] = {} + for s in sections: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + if not k: + continue + if k == "programs" and k in sec_map: + existing = sec_map["programs"].get("section_json", {}) + incoming = s.get("section_json", {}) + if not isinstance(existing, dict): + existing = {} + if not isinstance(incoming, dict): + incoming = {} + a = existing.get("programs", []) + b = incoming.get("programs", []) + if not isinstance(a, list): + a = [] + if not isinstance(b, list): + b = [] + existing["programs"] = a + b + sec_map["programs"]["section_json"] = existing + continue + sec_map[k] = s + + # Campus image + img_section = sec_map.get("campus_image") or sec_map.get("image") + campus_image = "" + campus_caption = "" + if img_section: + j = img_section.get("section_json", {}) + if isinstance(j, dict): + campus_url = str(j.get("image_url", "")).strip() + campus_caption = str(j.get("caption", "")).strip() + if allow_remote and campus_url: + embedded = fetch_image_data_uri(campus_url) + if embedded: + campus_image = embedded + stats["images_embedded"] = stats.get("images_embedded", 0) + 1 + else: + stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 + else: + stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 + + # Overview and website + resolved_website = (uni_raw.get("website") or "").strip() + overview_data = None + + if "overview" in sec_map: + overview_json = sec_map["overview"].get("section_json", {}) + if not isinstance(overview_json, dict): + overview_json = {} + + site_from_overview = get_any( + overview_json, + ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"], + ) + if not resolved_website and site_from_overview: + resolved_website = site_from_overview + + overview_data = { + "founded": get_any(overview_json, ["founded", "Founded"]), + "total_students": get_any(overview_json, ["total_students", "Total Students"]), + "undergraduates": get_any(overview_json, ["undergraduates", "Undergraduate Students", "undergraduate_students"]), + "postgraduates": get_any(overview_json, ["postgraduate_students", "Postgraduate Students"]), + "acceptance_rate": get_any(overview_json, ["acceptance_rate", "Acceptance Rate"]), + "location": get_any(overview_json, ["location", "Location"]), + "tuition": format_money_figures(str(get_any(overview_json, [ + "tuition_out_of_state_yearly", + "Yearly Out of State Tuition Fees", + "Yearly Out-of-State Tuition Fees", + "Yearly Tuition Fees", + "Yearly Out-of-State Tuition Fees:", + ]) or "")) or None, + } + + if resolved_website: + stats["university_links"] = stats.get("university_links", 0) + 1 + stats["website_rows"] = stats.get("website_rows", 0) + 1 + + # Benefits + # Benefits + Funding + benefits = [] + funding_heading = "Funding Available" + funding_items: list[str] = [] + + if "benefits" in sec_map: + j = sec_map["benefits"].get("section_json", {}) + if not isinstance(j, dict): + j = {} + + raw_benefits = j.get("benefits", []) + if isinstance(raw_benefits, list): + benefits = [str(b).strip() for b in raw_benefits if str(b).strip()] + else: + benefits = [] + + funding_heading, funding_items = _extract_university_funding( + j, + { + "school_category": uni_raw.get("school_category"), + "status": "in" if is_truthy(uni_raw.get("is_active", True)) else "out", + }, + ) + + # Programs + programs = None + if "programs" in sec_map: + j = sec_map["programs"].get("section_json", {}) + if not isinstance(j, dict): + j = {} + programs_raw = j.get("programs", []) + if not isinstance(programs_raw, list): + programs_raw = [] + + if not include_inactive_programs: + programs_raw = [ + p for p in programs_raw + if isinstance(p, dict) and is_truthy( + p.get("program_active", p.get("is_active", p.get("active", 1))) + ) + ] + + programs = [] + seen_names = set() + for p in programs_raw: + if not isinstance(p, dict): + continue + program_name = str(p.get("program_name", "")).strip() + # Deduplicate by lowercase program name + key = program_name.lower() + if key in seen_names: + continue + seen_names.add(key) + link = str(p.get("program_link", "")).strip() + if not link and isinstance(p.get("program_links"), dict): + link = str(p["program_links"].get("web_link", "")).strip() + + programs.append({ + "name": program_name, + "link": link, + "designation": str(p.get("designation", "")), + "entrance": str(p.get("entrance_exam", p.get("entrance_examination", ""))), + }) + + # Extra sections + skip_keys = {"campus_image", "image", "overview", "benefits", "programs"} + extra_sections = [] + for s in sections: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + if not k or k in skip_keys: + continue + title = str(s.get("section_title", "")) + j = s.get("section_json", {}) + if not isinstance(j, dict): + j = {} + rendered = render_global_blocks(k, title, j, debug) + extra_sections.append({"rendered_html": Markup(rendered)}) + + classes = ["uni"] + if not is_first: + classes.append("page-break") + + return { + "name": uni_name, + "anchor": uni_raw.get("anchor"), + "sort_order": uni_raw.get("sort_order"), + "website": resolved_website, + "classes": classes, + "overview": overview_data, + "campus_image": campus_image, + "campus_caption": campus_caption, + "benefits": benefits, + "funding_heading": funding_heading, + "funding_items": funding_items, + "programs": programs, + "extra_sections": extra_sections, + } + + +def build_handbook_html( + globals_data: list[dict[str, Any]], + by_uni: dict[int, dict[str, Any]], + images: dict[str, Any], + allow_remote: bool, + include_inactive_programs: bool = False, + debug: bool = False, +) -> str: + """Build the full handbook HTML document using Jinja2 templates. + + Preserves the same data preparation logic from the original version. + Rendering is delegated to Jinja2 templates with Playwright-compatible + HTML/CSS output. + """ + env = _get_jinja_env() + template = env.get_template("handbook.html") + + font_meta = select_font_family() + font_css = font_face_css(font_meta) + + # Base URL for static assets (CSS, images, etc.) + base_url = _static_base_url() + + stats: dict[str, Any] = { + "universities": 0, + "images_embedded": 0, + "images_placeholder": 0, + "program_links_total": 0, + "program_missing_links_total": 0, + "missing_program_links": {}, + "university_links": 0, + "website_rows": 0, + "program_option_warnings": [], + } + + # ── Cover Image ── + cover_image = images.get("coverImage", "") + if cover_image and os.path.isfile(cover_image): + cover_image = Path(cover_image).as_uri() + else: + cover_image = "" + + # ── TOC Image ── + toc_image = images.get("tocImage", "") + if toc_image and os.path.isfile(toc_image): + toc_image = Path(toc_image).as_uri() + else: + toc_image = "" + + # ── Header Image (repeating page header) ── + header_image = images.get("headerImage", "") + if header_image and os.path.isfile(header_image): + mime = mimetypes.guess_type(header_image)[0] or "image/jpeg" + with open(header_image, "rb") as f: + header_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode()}" + else: + header_image = "" + + # ── Label Image (repeating right-side label) ── + label_image = images.get("labelImage", "") + if label_image and os.path.isfile(label_image): + mime = mimetypes.guess_type(label_image)[0] or "image/png" + with open(label_image, "rb") as f: + label_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode()}" + else: + logger.warning("Label image not found locally: %s", label_image) + label_image = "" + + # ── Prepare active universities (sorted: Tier One first, Tier Two second) ── + active_universities: list[dict[str, Any]] = [] + for uid, uni in by_uni.items(): + if not isinstance(uni, dict): + continue + if not is_truthy(uni.get("is_active", True)): + continue + name = str(uni.get("university_name", f"University #{uid}")) + anchor = handbook_anchor("uni", name, int(uid)) + school_category = str(uni.get("school_category", "")).strip() + tier = uni.get("tier") + tier_label = str(uni.get("tier_label", "")).strip() + active_universities.append({ + "id": int(uid), + "anchor": anchor, + "name": name, + "sections": uni.get("sections", []) if isinstance(uni.get("sections"), list) else [], + "website": str(uni.get("website", "")), + "sort_order": int(uni["sort_order"]) if uni.get("sort_order") is not None and str(uni.get("sort_order", "")).lstrip("-").isdigit() else None, + "school_category": school_category, + "tier": tier, + "tier_label": tier_label, + }) + + # Stable tier ordering: Tier One (non_cosigner) → Tier Two (cosigner) → others, then alphabetical + def _tier_sort(u: dict) -> tuple: + t = u.get("tier") + rank = t if isinstance(t, int) else 99 + return (rank, (u.get("name") or "").lower(), u.get("id", 0)) + active_universities.sort(key=_tier_sort) + + # ── Normalise globals ── + globals_data = sort_sections_stable(globals_data) + + required_keys = [ + "table_of_contents", + "overview", + "how_the_program_works", + ] + existing_keys = {str(g.get("section_key", "")).lower() for g in globals_data if isinstance(g, dict)} + missing = [k for k in required_keys if k not in existing_keys] + if missing: + msg = f"Handbook required sections missing: {','.join(missing)}" + logger.error(msg) + raise RuntimeError(msg) + + general_sections: list[dict[str, Any]] = [] + toc_sort_order = None + toc_title = "Table of Contents" + + for idx, g in enumerate(globals_data): + if not isinstance(g, dict): + continue + key_raw = str(g.get("section_key", "")) + key = key_raw.lower() + sort_order = int(g["sort_order"]) if g.get("sort_order") is not None and str(g.get("sort_order", "")).lstrip("-").isdigit() else None + + if key == "table_of_contents" and toc_sort_order is None: + toc_sort_order = sort_order if sort_order is not None else (idx + 1) + toc_title = str(g.get("section_title", "Table of Contents")) + continue + + section_hits: list[str] = [] + _collect_program_option_inconsistencies( + g.get("section_json", {}), + f"global.{key_raw}", + section_hits, + ) + for hit in section_hits: + if hit not in stats["program_option_warnings"]: + stats["program_option_warnings"].append(hit) + + anchor = handbook_anchor("g", str(g.get("section_title", g.get("section_key", "section"))), idx) + general_sections.append({ + "anchor": anchor, + "data": g, + "sort_order": sort_order, + }) + + # ── Build TOC items ── + toc_items: list[dict[str, Any]] = [] + for gs in general_sections: + # Prefer the JSON-level title (display-ready) over the DB section_title + gs_json = gs["data"].get("section_json", {}) + if isinstance(gs_json, dict) and gs_json.get("title", "").strip(): + title = gs_json["title"].strip() + else: + title = str(gs["data"].get("section_title", gs["data"].get("section_key", "Section"))) + toc_items.append({ + "title": title, + "target": "#" + gs["anchor"], + "level": 0, + "bold": True, + "sort": gs["sort_order"], + }) + + for u in active_universities: + toc_items.append({ + "title": u["name"], + "target": "#" + u["anchor"], + "level": 1, + "bold": False, + "sort": u.get("sort_order"), + }) + + # ── Prepare sorted TOC items for template ── + sorted_toc = sort_toc(list(toc_items)) + toc_items_sorted = [] + for e in sorted_toc: + if not isinstance(e, dict): + continue + title = str(e.get("title", "")).strip() + if not title: + continue + level = max(0, min(3, int(e.get("level", 0)))) + bold = bool(e.get("bold", False)) + upper = bool(e.get("upper", False)) + if level == 0: + bold = True + upper = True + display_title = title.upper() if upper else title + page = str(e.get("page", "")).strip() + + toc_items_sorted.append({ + "title": title, + "display_title": display_title, + "target": str(e.get("target", e.get("anchor", ""))).strip(), + "level": level, + "bold": bold, + "upper": upper, + "page": page, + }) + + # ── Prepare general sections with rendered HTML and typed blocks ── + template_sections = [] + for gs in general_sections: + data = gs["data"] + key_lower = str(data.get("section_key", "")).lower() + + sec_class = SECTION_CLASS_MAP.get(key_lower) + if sec_class is None: + sec_class = "sec-" + re.sub(r"[^a-z0-9]+", "-", key_lower) + + section_json = data.get("section_json", {}) + if not isinstance(section_json, dict): + section_json = {} + + # Typed blocks for the new rendering path + blocks = normalize_section( + str(data.get("section_key", "")), + str(data.get("section_title", "")), + section_json, + debug=debug, + ) + + # Legacy HTML fallback + section_html = render_global_blocks( + str(data.get("section_key", "")), + str(data.get("section_title", "")), + section_json, + debug, + ) + + if not section_html.strip() and not blocks: + logger.warning( + "Empty section render key=%s sort_order=%s", + data.get("section_key"), + data.get("sort_order"), + ) + + template_sections.append({ + "anchor": gs["anchor"], + "data": data, + "page_break": key_lower in PAGE_BREAK_KEYS, + "sec_class": sec_class, + "blocks": blocks, + "rendered_html": Markup(section_html), + }) + + # ── Prepare university data for templates (both old + new paths) ── + # Group by tier for tier heading insertion in the PDF output + university_template_data = [] + university_block_data = [] + # Track which tier label was last emitted so we can insert tier divider headings + _seen_tier_labels: set[str] = set() + + for idx, uni_raw in enumerate(active_universities): + uni_raw["_is_first"] = (idx == 0) + + # Insert tier group heading when tier changes + current_tier_label = str(uni_raw.get("tier_label", "")).strip() + if current_tier_label and current_tier_label not in _seen_tier_labels: + _seen_tier_labels.add(current_tier_label) + # Mark this university as starting a new tier group + uni_raw["_tier_group_start"] = True + uni_raw["_tier_group_label"] = f"{current_tier_label} Schools" + + uni_hits: list[str] = [] + _collect_program_option_inconsistencies( + uni_raw.get("sections", []), + f"university.{uni_raw.get('name', idx)}", + uni_hits, + ) + for hit in uni_hits: + if hit not in stats["program_option_warnings"]: + stats["program_option_warnings"].append(hit) + + # Legacy path + uni_data = _prepare_university_data( + uni_raw, allow_remote, include_inactive_programs, debug, stats, + ) + # Carry tier metadata to template data + uni_data["tier"] = uni_raw.get("tier") + uni_data["tier_label"] = uni_raw.get("tier_label", "") + uni_data["tier_group_start"] = uni_raw.get("_tier_group_start", False) + uni_data["tier_group_label"] = uni_raw.get("_tier_group_label", "") + university_template_data.append(uni_data) + # New block path + uni_block = normalize_university( + uni_raw, allow_remote, include_inactive_programs, debug, stats, + ) + university_block_data.append(uni_block) + + # ── Bottom pages ── + bottom_pages_urls = [] + raw_bottom = images.get("bottomPages", []) + if isinstance(raw_bottom, list): + for img_path in raw_bottom: + if os.path.isfile(str(img_path)): + bottom_pages_urls.append(Path(str(img_path)).as_uri()) + + # ── Render template ── + if stats["program_option_warnings"]: + logger.warning( + "Program option consistency warnings (missing REGULAR or PRIME pair): %s", + stats["program_option_warnings"], + ) + + html = template.render( + font_css=Markup(font_css), + base_url=base_url, + extra_css="", + header_image=header_image, + label_image=label_image, + cover_image=cover_image, + toc_image=toc_image, + toc_items=toc_items, + toc_items_sorted=toc_items_sorted, + toc_title=toc_title, + toc_sort_order=toc_sort_order, + general_sections=template_sections, + summary_block=None, + universities=university_template_data, + university_blocks=university_block_data, + bottom_pages=bottom_pages_urls, + debug=debug, + stats=stats, + ) + + return html diff --git a/app/services/normalizer.py b/app/services/normalizer.py new file mode 100644 index 0000000000000000000000000000000000000000..03ffaa5187791befd3e8076afcce36ca96db2e7e --- /dev/null +++ b/app/services/normalizer.py @@ -0,0 +1,945 @@ +"""Normalization layer — converts raw MySQL handbook content into typed render blocks. + +Each section_json from the database is parsed into a list of RenderBlock +objects. Every block has a `block_type` that maps 1-to-1 to a Jinja +partial and a CSS class. This prevents ad-hoc interpretation of raw +JSON throughout the rendering pipeline. + +Block types (from theme.BLOCK_TYPES): + heading_1, heading_2, paragraph, bullet_list, note, table, + enrollment_steps, school_profile, university_summary, toc, + cover, full_page_image +""" + +from __future__ import annotations + +import re +from urllib.parse import quote_plus +from dataclasses import dataclass, field +from typing import Any + +from app.services.renderers import _extract_university_funding +from app.services.utils import ( + ensure_program_options_pair, + emphasize_keywords, + format_money_figures, + get_any, + h, + hb_slug, + is_assoc, + is_truthy, + linkify_urls, +) +from app.services.renderers import fetch_image_data_uri + + +# ─────────────────────────────────────────────────────────────── +# Block data-classes +# ─────────────────────────────────────────────────────────────── + +@dataclass +class RenderBlock: + """Base typed render block.""" + block_type: str + css_class: str = "" + data: dict[str, Any] = field(default_factory=dict) + + +# ─────────────────────────────────────────────────────────────── +# Section → blocks +# ─────────────────────────────────────────────────────────────── + +def normalize_section( + section_key: str, + section_title: str, + section_json: dict | list, + *, + universities: list[dict] | None = None, + debug: bool = False, +) -> list[RenderBlock]: + """Convert a single global section payload into a list of RenderBlocks. + + This is the single translation point between the database schema + and the rendering layer. + """ + blocks: list[RenderBlock] = [] + key_norm = section_key.lower().strip() + + if not isinstance(section_json, dict): + section_json = {} + + layout_norm = str(section_json.get("layout", "")).lower().strip() + + # ── Section heading ── + # Prefer the JSON-level title (display-ready) over the DB section_title + json_title = str(section_json.get("title", "")).strip() if isinstance(section_json, dict) else "" + title = json_title or section_title.strip() + if title and key_norm != "table_of_contents": + blocks.append(RenderBlock( + block_type="heading_1", + css_class="hb-heading-1", + data={"text": title}, + )) + + # ── Steps → enrollment_steps ── + steps = section_json.get("steps") + if isinstance(steps, list): + blocks.append(RenderBlock( + block_type="enrollment_steps", + css_class="hb-enrollment-steps", + data={"steps": _normalize_steps(steps)}, + )) + return blocks + + # ── Bullets ── + has_bullets = isinstance(section_json.get("bullets"), list) + has_items = isinstance(section_json.get("items"), list) + if has_bullets or (layout_norm == "bullets_with_note" and has_items): + from markupsafe import Markup + lst = section_json.get("items") if has_items else section_json.get("bullets") + items = [_normalize_text_content(str(b).strip()) for b in lst if str(b).strip()] + html_items = [Markup(emphasize_keywords(it)) for it in items] + blocks.append(RenderBlock( + block_type="bullet_list", + css_class="hb-bullet-list", + data={"entries": html_items, "html_entries": True}, + )) + note = _normalize_text_content( + str(section_json.get("note", section_json.get("footnote", ""))).strip() + ) + if note: + blocks.append(RenderBlock( + block_type="note", + css_class="hb-note", + data={"text": note}, + )) + return blocks + + # ── Basic table ── + cols = section_json.get("columns") + rows = section_json.get("rows") + if isinstance(cols, list) and isinstance(rows, list): + blocks.append(_normalize_basic_table(cols, rows)) + return blocks + + # ── table_v2 ── + if layout_norm == "table_v2": + blocks.append(_normalize_table_v2(section_json)) + return blocks + + # ── doc_v1 ── + if layout_norm == "doc_v1" and isinstance(section_json.get("blocks"), list): + blocks.extend(_normalize_doc_v1(section_json["blocks"], skip_title=title)) + # Post-process breakdown section for Relocation Cost layout + if key_norm == "program_features_breakdown": + blocks = _postprocess_breakdown(blocks, section_json["blocks"]) + # Post-process Tier 2 section for sub-bullet styling + if key_norm == "summary_of_universities_cosigner": + blocks = _postprocess_tier2(blocks) + return blocks + + # ── Fallback ── + if "text" in section_json: + text = _normalize_text_content(str(section_json["text"])) + if text.strip(): + from markupsafe import Markup + blocks.append(RenderBlock( + block_type="paragraph", + css_class="hb-paragraph", + data={ + "text": text, + "html": Markup(emphasize_keywords(text)), + }, + )) + + return blocks + + +def _normalize_text_content(text: str) -> str: + """Apply global handbook text normalization in a single place.""" + return ensure_program_options_pair(format_money_figures(text)) + + +# ─────────────────────────────────────────────────────────────── +# University profile normalisation +# ─────────────────────────────────────────────────────────────── + +def normalize_university( + uni_raw: dict[str, Any], + allow_remote: bool, + include_inactive_programs: bool, + debug: bool, + stats: dict[str, Any], +) -> RenderBlock: + """Convert raw university data into a school_profile RenderBlock.""" + uni_name = uni_raw["name"] + sections = uni_raw.get("sections", []) + is_first = uni_raw.get("_is_first", False) + + stats["universities"] = stats.get("universities", 0) + 1 + + # Build section map; merge duplicate "programs" sections + sec_map: dict[str, dict] = {} + for s in sections: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + if not k: + continue + if k == "programs" and k in sec_map: + existing = sec_map["programs"].get("section_json", {}) + incoming = s.get("section_json", {}) + if not isinstance(existing, dict): + existing = {} + if not isinstance(incoming, dict): + incoming = {} + a = existing.get("programs", []) + b = incoming.get("programs", []) + if not isinstance(a, list): + a = [] + if not isinstance(b, list): + b = [] + existing["programs"] = a + b + sec_map["programs"]["section_json"] = existing + continue + sec_map[k] = s + + # Campus image + img_section = sec_map.get("campus_image") or sec_map.get("image") + campus_image = "" + campus_caption = "" + if img_section: + j = img_section.get("section_json", {}) + if isinstance(j, dict): + campus_url = str(j.get("image_url", "")).strip() + campus_caption = str(j.get("caption", "")).strip() + if allow_remote and campus_url: + embedded = fetch_image_data_uri(campus_url) + if embedded: + campus_image = embedded + stats["images_embedded"] = stats.get("images_embedded", 0) + 1 + else: + stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 + else: + stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 + + # Overview and website + resolved_website = (uni_raw.get("website") or "").strip() + overview_data = None + + if "overview" in sec_map: + overview_json = sec_map["overview"].get("section_json", {}) + if not isinstance(overview_json, dict): + overview_json = {} + + site_from_overview = get_any( + overview_json, + ["university_website", "university_website_url", "website", + "site", "url", "homepage", "web_url"], + ) + if not resolved_website and site_from_overview: + resolved_website = site_from_overview + + overview_data = { + "founded": get_any(overview_json, ["founded", "Founded"]), + "total_students": get_any(overview_json, ["total_students", "Total Students"]), + "undergraduates": get_any(overview_json, [ + "undergraduates", "Undergraduate Students", "undergraduate_students", + ]), + "postgraduates": get_any(overview_json, [ + "postgraduate_students", "Postgraduate Students", + ]), + "acceptance_rate": get_any(overview_json, ["acceptance_rate", "Acceptance Rate"]), + "location": get_any(overview_json, ["location", "Location"]), + "tuition": format_money_figures(str(get_any(overview_json, [ + "tuition_out_of_state_yearly", + "Yearly Out of State Tuition Fees", + "Yearly Out-of-State Tuition Fees", + "Yearly Tuition Fees", + "Yearly Out-of-State Tuition Fees:", + ]) or "")) or None, + } + + if resolved_website: + stats["university_links"] = stats.get("university_links", 0) + 1 + stats["website_rows"] = stats.get("website_rows", 0) + 1 + + # Benefits + Funding + benefits: list[str] | None = [] + funding_heading = "Funding Available" + funding_items: list[str] = [] + if "benefits" in sec_map: + j = sec_map["benefits"].get("section_json", {}) + if not isinstance(j, dict): + j = {} + raw_benefits = j.get("benefits", []) + if isinstance(raw_benefits, list): + benefits = [ + _normalize_text_content(str(b).strip()) + for b in raw_benefits + if str(b).strip() + ] + else: + benefits = [] + + funding_heading, funding_items = _extract_university_funding( + j, + { + "school_category": uni_raw.get("school_category"), + "status": "in" if is_truthy(uni_raw.get("is_active", True)) else "out", + }, + ) + # Normalize money formatting in funding items + funding_items = [_normalize_text_content(item) for item in funding_items] + + # Programs + programs = None + if "programs" in sec_map: + j = sec_map["programs"].get("section_json", {}) + if not isinstance(j, dict): + j = {} + programs_raw = j.get("programs", []) + if not isinstance(programs_raw, list): + programs_raw = [] + + if not include_inactive_programs: + programs_raw = [ + p for p in programs_raw + if isinstance(p, dict) and is_truthy( + p.get("program_active", p.get("is_active", p.get("active", 1))) + ) + ] + + programs = [] + seen_names = set() + for p in programs_raw: + if not isinstance(p, dict): + continue + program_name = _normalize_text_content(str(p.get("program_name", "")).strip()) + # Deduplicate by lowercase program name + key = program_name.lower() + if key in seen_names: + continue + seen_names.add(key) + link = str(p.get("program_link", "")).strip() + if not link and isinstance(p.get("program_links"), dict): + link = str(p["program_links"].get("web_link", "")).strip() + + programs.append({ + "name": program_name, + "link": link, + "designation": _normalize_text_content(str(p.get("designation", ""))), + "entrance": _normalize_text_content(str(p.get("entrance_exam", p.get("entrance_examination", "")))), + }) + + # Extra sections (rendered via global blocks normalizer) + skip_keys = {"campus_image", "image", "overview", "benefits", "programs"} + extra_blocks: list[list[RenderBlock]] = [] + for s in sections: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + if not k or k in skip_keys: + continue + title = str(s.get("section_title", "")) + j = s.get("section_json", {}) + if not isinstance(j, dict): + j = {} + extra_blocks.append(normalize_section(k, title, j, debug=debug)) + + classes = ["hb-school-profile", "page-break"] + + return RenderBlock( + block_type="school_profile", + css_class=" ".join(classes), + data={ + "name": uni_name, + "anchor": uni_raw.get("anchor"), + "sort_order": uni_raw.get("sort_order"), + "website": resolved_website, + "overview": overview_data, + "campus_image": campus_image, + "campus_caption": campus_caption, + "benefits": benefits, + "funding_heading": funding_heading, + "funding_items": funding_items, + "programs": programs, + "extra_blocks": extra_blocks, + }, + ) + + +# ─────────────────────────────────────────────────────────────── +# Internal helpers +# ─────────────────────────────────────────────────────────────── + +def _normalize_steps(steps: list) -> list[dict]: + """Normalise enrollment steps into structured dicts.""" + result = [] + step_num = 0 + for s in steps: + if not isinstance(s, dict): + continue + step_num += 1 + step_title = str(s.get("title", s.get("step_title", ""))).strip() + body = _normalize_text_content(str(s.get("body", s.get("description", ""))).strip()) + + # Pre-format body with bold emphasis on REGULAR, PRIME, $ amounts + from markupsafe import Markup + body_html = Markup(emphasize_keywords(body)) if body else "" + + links = [] + plain_links = [] + raw_links = s.get("links", []) + if isinstance(raw_links, list): + for lnk in raw_links: + if not isinstance(lnk, dict): + continue + label = str(lnk.get("label", "Link")).strip() + url = str(lnk.get("url", "")).strip() + if url: + low_label = label.lower() + low_url = url.lower() + is_telegram = "telegram" in low_label or "t.me" in low_url + if step_num == 2 and "internationalscholarsprogram.com" in low_url and not re.match(r"^https?://", url, flags=re.IGNORECASE): + url = "https://" + url + # All links (including Telegram) are rendered as clickable anchors. + # For Telegram use the full URL as visible label so readers can see/type it. + link_label = url if is_telegram else label + links.append({"label": link_label, "url": url}) + + if step_num == 2 and not any( + "internationalscholarsprogram.com" in str(l.get("url", "")).lower() + for l in links + ): + links.append({ + "label": "www.internationalscholarsprogram.com", + "url": "https://www.internationalscholarsprogram.com", + }) + + qr = str(s.get("qr_url", s.get("qr_image", ""))).strip() + telegram_url = "" + if step_num == 1: + telegram_ref = "" + if plain_links: + telegram_ref = plain_links[0] + elif isinstance(body, str): + m = re.search(r"(https?://(?:t\.me|telegram\.me)/[^\s<)]+)", body, flags=re.IGNORECASE) + if m: + telegram_ref = m.group(1) + if telegram_ref: + telegram_url = telegram_ref + if not qr: + qr = ( + "https://api.qrserver.com/v1/create-qr-code/?size=160x160&data=" + + quote_plus(telegram_ref) + ) + # Strip the raw telegram URL and the follow-up description from body + body = re.sub(r"https?://(?:t\.me|telegram\.me)/[^\s<)]+", "", body, flags=re.IGNORECASE) + body = re.sub(r"This telegram group will help you interact with program administrators and other prospective students where you can ask any questions you may have about the program\.?", "", body, flags=re.IGNORECASE) + body = re.sub(r"\n{2,}", "\n", body).strip() + body_html = Markup(emphasize_keywords(body)) if body else "" + + result.append({ + "number": step_num, + "title": step_title, + "body": body, + "body_html": body_html, + "links": links, + "plain_links": plain_links, + "qr_url": qr, + "telegram_url": telegram_url, + }) + return result + + +def _normalize_basic_table(cols: list, rows: list) -> RenderBlock: + """Normalise a basic table (columns + rows).""" + norm_rows = [] + for r in rows: + if not isinstance(r, (list, dict)): + continue + if isinstance(r, dict): + row = [] + for col_label in cols: + key_guess = re.sub(r"[^a-z0-9]+", "_", str(col_label).lower()) + cell = r.get(key_guess, "") + # Normalize text, emphasize keywords, then linkify URLs for clickable links + cell_html = emphasize_keywords(_normalize_text_content(str(cell))) + cell_with_links = linkify_urls(cell_html) + row.append(cell_with_links) + norm_rows.append(row) + else: + norm_rows.append([linkify_urls(emphasize_keywords(_normalize_text_content(str(cell)))) for cell in r]) + + return RenderBlock( + block_type="table", + css_class="hb-table", + data={ + "columns": [str(c) for c in cols], + "rows": norm_rows, + "variant": "standard", + }, + ) + + +def _normalize_table_v2(json_data: dict) -> RenderBlock: + """Normalise table_v2 (comparison table with header groups).""" + base_cols = json_data.get("base_columns", []) + groups = json_data.get("header_groups", []) + rows = json_data.get("rows", []) + if not isinstance(base_cols, list): + base_cols = [] + if not isinstance(groups, list): + groups = [] + if not isinstance(rows, list): + rows = [] + + all_cols: list[dict] = [] + for c in base_cols: + if isinstance(c, dict): + all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))}) + for g in groups: + if not isinstance(g, dict): + continue + g_cols = g.get("columns", []) + if not isinstance(g_cols, list): + g_cols = [] + for c in g_cols: + if isinstance(c, dict): + all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))}) + + norm_rows = [] + for r in rows: + if not isinstance(r, dict): + continue + row = {} + for c in all_cols: + k = c.get("key", "") + val = r.get(k, "") + if isinstance(val, dict): + val = val.get("text", "") + row[k] = emphasize_keywords(_normalize_text_content(str(val))) + norm_rows.append(row) + + return RenderBlock( + block_type="table", + css_class="hb-table hb-table-comparison", + data={ + "base_columns": [{"key": c.get("key", ""), "label": c.get("label", "")} for c in base_cols if isinstance(c, dict)], + "header_groups": [ + { + "label": str(g.get("label", "")), + "columns": [{"key": str(c.get("key", "")), "label": str(c.get("label", ""))} + for c in (g.get("columns", []) if isinstance(g.get("columns"), list) else []) + if isinstance(c, dict)], + } + for g in groups if isinstance(g, dict) + ], + "all_columns": all_cols, + "rows": norm_rows, + "variant": "comparison", + }, + ) + + +# ─────────────────────────────────────────────────────────────── +# Breakdown section post-processor +# ─────────────────────────────────────────────────────────────── + +def _postprocess_breakdown( + blocks: list[RenderBlock], + raw_blocks: list, +) -> list[RenderBlock]: + """Rewrite the breakdown section to match the reference layout. + + - "Relocation Cost" becomes a banner heading with page-break-before + - The relocation table gets a merged right cell (rowspan) with the + cost-coverage note moved inside it + - "ISP FINANCING" becomes an inline note with mixed bold/italic + - "NB: CREDIT FACILITY" is styled green + - Dollar amounts in parentheticals keep their original $ format + """ + from markupsafe import Markup + + # Find raw blocks for the relocation cost table (pre-normalised, $ intact) + raw_reloc_table = None + raw_note_after_table = None + found_reloc = False + for i, rb in enumerate(raw_blocks): + if not isinstance(rb, dict): + continue + if rb.get("type") == "subheading" and "relocation" in str(rb.get("text", "")).lower(): + found_reloc = True + continue + if found_reloc and rb.get("type") == "table_v1" and raw_reloc_table is None: + raw_reloc_table = rb + continue + if found_reloc and raw_reloc_table and rb.get("type") == "paragraph" and raw_note_after_table is None: + raw_note_after_table = rb + break + + result: list[RenderBlock] = [] + i = 0 + while i < len(blocks): + blk = blocks[i] + + # ── Detect "Relocation Cost" heading ── + if (blk.block_type == "heading_2" + and "relocation" in blk.data.get("text", "").lower()): + + # Banner heading with page break + result.append(RenderBlock( + block_type="heading_2", + css_class="hb-heading-2 hb-banner-heading page-break", + data={"text": blk.data["text"]}, + )) + i += 1 + + # Replace the next table with spanning variant that has merged cell + if i < len(blocks) and blocks[i].block_type == "table" and raw_reloc_table: + raw_rows = raw_reloc_table.get("rows", []) + # Build the note text for the merged right cell + note_text = "" + if raw_note_after_table: + note_text = str(raw_note_after_table.get("text", "")) + + spanning_rows = _build_relocation_spanning_rows(raw_rows, note_text) + result.append(RenderBlock( + block_type="table", + css_class="hb-table hb-relocation-table", + data={"rows": spanning_rows, "variant": "spanning"}, + )) + i += 1 # skip the original table + + # Skip the paragraph that was moved into the merged cell + if (i < len(blocks) + and blocks[i].block_type == "paragraph" + and note_text): + i += 1 + continue + + # ── "ISP FINANCING" heading → inline note with mixed formatting ── + if (blk.block_type == "heading_2" + and "isp financing" in blk.data.get("text", "").lower()): + # Next block should be the interest rate paragraph + rate_text = "" + if i + 1 < len(blocks) and blocks[i + 1].block_type == "paragraph": + rate_text = blocks[i + 1].data.get("text", "") + result.append(RenderBlock( + block_type="note", + css_class="hb-note hb-isp-financing", + data={ + "parts": [ + {"text": "ISP FINANCING", "style": "bold"}, + {"text": " (" + _extract_rate_italic(rate_text) + "): " if rate_text else "", "style": "italic"}, + {"text": _extract_rate_amount(rate_text), "style": "bold"}, + ], + "inline": True, + }, + )) + i += 1 # skip the heading + if rate_text: + i += 1 # skip the paragraph + continue + + # ── "NB: CREDIT FACILITY" note → green styling ── + if (blk.block_type == "note" + and "credit facility" in blk.data.get("text", "").lower()): + result.append(RenderBlock( + block_type="note", + css_class="hb-note hb-credit-note", + data=blk.data, + )) + i += 1 + continue + + result.append(blk) + i += 1 + + return result + + +def _build_relocation_spanning_rows( + raw_rows: list, note_text: str, +) -> list[list[dict]]: + """Build spanning rows for the relocation cost table. + + Row 0: normal 2-column (consultation fees | Covered in the contribution) + Rows 1-7: left cell per row, right cell merged (rowspan) with italic note + Rows 8+: left cell only, empty right + """ + from markupsafe import Markup + + if not raw_rows: + return [] + + rows: list[list[dict]] = [] + + # Row 0 — has "Covered in the contribution" + first = raw_rows[0] if raw_rows else ["", ""] + rows.append([ + {"text": Markup(emphasize_keywords(str(first[0] if len(first) > 0 else ""))), "colspan": 1, "rowspan": 1}, + {"text": Markup("" + h(str(first[1] if len(first) > 1 else "")) + ""), "colspan": 1, "rowspan": 1}, + ]) + + # Rows 1-7: items with dollar amounts that get the merged right cell + # These are the visa/fee/rent/ticket rows (have parenthetical dollar amounts) + merged_start = 1 + merged_end = min(8, len(raw_rows)) # Visa Integrity through Air ticket + + for idx in range(merged_start, len(raw_rows)): + cell_text = str(raw_rows[idx][0] if len(raw_rows[idx]) > 0 else "") + left = {"text": Markup(emphasize_keywords(cell_text)), "colspan": 1, "rowspan": 1} + + if idx == merged_start and note_text: + # First merged row gets the rowspan cell + span_count = merged_end - merged_start + note_html = note_text.replace("\n\n", "

") + right = { + "text": Markup('' + h(note_html).replace("<br/><br/>", "

") + "
"), + "colspan": 1, + "rowspan": span_count, + } + rows.append([left, right]) + elif idx < merged_end: + # Subsequent merged rows — no right cell (covered by rowspan) + rows.append([left]) + else: + # Remaining rows — empty right cell + rows.append([ + left, + {"text": "", "colspan": 1, "rowspan": 1}, + ]) + + return rows + + +def _extract_rate_italic(text: str) -> str: + """Extract the italic portion: 'Interest rate of 12% – 15% Market Rate PA'.""" + # Text is like: "Interest rate of 12% – 15% Market Rate: UP TO USD 10,000" + m = re.match(r"(Interest rate.*?(?:Market Rate|PA))", text, re.IGNORECASE) + if m: + return m.group(1).rstrip(": ") + # Fallback: everything before the colon + if ":" in text: + return text.split(":")[0].strip() + return text + + +def _extract_rate_amount(text: str) -> str: + """Extract the amount portion: 'UP TO USD 10,000'.""" + m = re.search(r"(UP TO.*)", text, re.IGNORECASE) + if m: + return m.group(1).strip() + if ":" in text: + return text.split(":", 1)[1].strip() + return "" + + +# ─────────────────────────────────────────────────────────────── +# Tier 2 (cosigner) section post-processor +# ─────────────────────────────────────────────────────────────── + +def _postprocess_tier2(blocks: list[RenderBlock]) -> list[RenderBlock]: + """Style the Tier 2 section to match the reference layout. + + - Second consecutive bullet_list (sub-bullets under Sources of Funds) + gets checkmark styling instead of arrows. + """ + result: list[RenderBlock] = [] + prev_was_bullet = False + for blk in blocks: + if blk.block_type == "bullet_list": + if prev_was_bullet: + # This is the sub-bullet list → use checkmark class + result.append(RenderBlock( + block_type="bullet_list", + css_class="hb-bullet-list hb-sub-bullets", + data=blk.data, + )) + else: + result.append(blk) + prev_was_bullet = True + else: + prev_was_bullet = False + result.append(blk) + return result + + +def _normalize_doc_v1(blocks: list, *, skip_title: str = "") -> list[RenderBlock]: + """Normalise doc_v1 blocks into typed RenderBlocks. + + Args: + skip_title: When set, any leading heading/subheading block whose text + matches this title (case-insensitive) is dropped to avoid + duplicating the section heading already emitted by the caller. + """ + from markupsafe import Markup + _skip_norm = skip_title.strip().lower() if skip_title else "" + result: list[RenderBlock] = [] + for b in blocks: + if not isinstance(b, dict): + continue + btype = str(b.get("type", "")) + + # Skip heading/subheading blocks that duplicate the section title + if _skip_norm and btype in ("heading", "subheading"): + block_text = str(b.get("text", "")).strip().lower() + if block_text == _skip_norm: + continue + + if btype == "paragraph": + t = _normalize_text_content(str(b.get("text", ""))) + if t.strip(): + result.append(RenderBlock( + block_type="paragraph", + css_class="hb-paragraph", + data={ + "text": t, + "html": Markup(emphasize_keywords(t)), + }, + )) + + elif btype == "subheading": + t = _normalize_text_content(str(b.get("text", ""))) + if t.strip(): + result.append(RenderBlock( + block_type="heading_2", + css_class="hb-heading-2", + data={"text": t}, + )) + + elif btype == "bullets": + items = b.get("items", []) + if not isinstance(items, list): + items = [] + normalized = [_normalize_text_content(str(it).strip()) for it in items if str(it).strip()] + html_items = [Markup(emphasize_keywords(it)) for it in normalized] + if normalized: + result.append(RenderBlock( + block_type="bullet_list", + css_class="hb-bullet-list", + data={"entries": html_items, "html_entries": True}, + )) + + elif btype == "numbered_list": + items = b.get("items", []) + if not isinstance(items, list): + items = [] + normalized = [_normalize_text_content(str(it).strip()) for it in items if str(it).strip()] + html_items = [Markup(emphasize_keywords(it)) for it in normalized] + if normalized: + result.append(RenderBlock( + block_type="bullet_list", + css_class="hb-bullet-list hb-numbered-list", + data={"entries": html_items, "ordered": True, "html_entries": True}, + )) + + elif btype == "note": + t = _normalize_text_content(str(b.get("text", ""))) + if t.strip(): + result.append(RenderBlock( + block_type="note", + css_class="hb-note", + data={"text": t}, + )) + + elif btype == "note_inline": + parts = b.get("parts", []) + if not isinstance(parts, list): + parts = [] + normalized_parts = [] + for p in parts: + if not isinstance(p, dict): + continue + t = _normalize_text_content(str(p.get("text", ""))) + if t: + normalized_parts.append({ + "text": t, + "style": str(p.get("style", "")), + }) + if normalized_parts: + result.append(RenderBlock( + block_type="note", + css_class="hb-note", + data={"parts": normalized_parts, "inline": True}, + )) + + elif btype == "table_v1": + t_cols = b.get("columns", []) + t_rows = b.get("rows", []) + if not isinstance(t_cols, list): + t_cols = [] + if not isinstance(t_rows, list): + t_rows = [] + norm_rows = [] + for r in t_rows: + if not isinstance(r, list): + continue + norm_rows.append([emphasize_keywords(_normalize_text_content(str(cell))) for cell in r]) + result.append(RenderBlock( + block_type="table", + css_class="hb-table", + data={"columns": [str(c) for c in t_cols], "rows": norm_rows, "variant": "standard"}, + )) + + elif btype == "table": + # Generic table (columns may be objects or strings, rows may be dicts or lists) + t_cols = b.get("columns", []) + t_rows = b.get("rows", []) + if not isinstance(t_cols, list): + t_cols = [] + if not isinstance(t_rows, list): + t_rows = [] + col_labels = [] + col_keys = [] + for c in t_cols: + if isinstance(c, dict): + col_labels.append(str(c.get("label", c.get("key", "")))) + col_keys.append(str(c.get("key", ""))) + else: + col_labels.append(str(c)) + col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower())) + norm_rows = [] + for r in t_rows: + if isinstance(r, dict): + norm_rows.append([emphasize_keywords(_normalize_text_content(str(r.get(k, "")))) for k in col_keys]) + elif isinstance(r, list): + norm_rows.append([emphasize_keywords(_normalize_text_content(str(cell))) for cell in r]) + result.append(RenderBlock( + block_type="table", + css_class="hb-table", + data={"columns": col_labels, "rows": norm_rows, "variant": "standard"}, + )) + + elif btype in ("table_v3", "table_v4"): + t_rows = b.get("rows", []) + if not isinstance(t_rows, list): + t_rows = [] + norm_rows = [] + for r in t_rows: + if not isinstance(r, list): + continue + norm_row = [] + for cell in r: + if isinstance(cell, dict): + norm_row.append({ + "text": emphasize_keywords(_normalize_text_content(str(cell.get("text", "")))), + "colspan": int(cell.get("colspan", 1)) if str(cell.get("colspan", "")).isdigit() else 1, + "rowspan": int(cell.get("rowspan", 1)) if str(cell.get("rowspan", "")).isdigit() else 1, + }) + else: + norm_row.append({ + "text": emphasize_keywords(_normalize_text_content(str(cell))), + "colspan": 1, + "rowspan": 1, + }) + norm_rows.append(norm_row) + result.append(RenderBlock( + block_type="table", + css_class="hb-table", + data={"rows": norm_rows, "variant": "spanning"}, + )) + + return result diff --git a/app/services/pdf_renderer.py b/app/services/pdf_renderer.py new file mode 100644 index 0000000000000000000000000000000000000000..dcb99a3e5ea0867e45a993341413b68fcd034f52 --- /dev/null +++ b/app/services/pdf_renderer.py @@ -0,0 +1,326 @@ +"""Playwright-based PDF renderer — Chromium headless PDF export. + +Replaces WeasyPrint. Uses Playwright to launch headless Chromium, +load the fully-rendered HTML, wait for fonts/images/layout, and +export a print-quality PDF. +""" + +from __future__ import annotations + +import asyncio +import logging +import os +import tempfile +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + +# Singleton browser instance for reuse across requests +_browser = None +_browser_lock = asyncio.Lock() + + +async def _get_browser(): + """Get or create a persistent Chromium browser instance. + + Uses ``channel="chrome"`` so Playwright drives the system-installed + Google Chrome (or Chromium) instead of requiring a separate browser + download from the Playwright CDN. Falls back to the default + bundled Chromium if the system browser is not found. + """ + global _browser + async with _browser_lock: + if _browser is None or not _browser.is_connected(): + from playwright.async_api import async_playwright + + pw = await async_playwright().start() + + launch_args = [ + "--no-sandbox", + "--disable-setuid-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + "--font-render-hinting=none", + ] + + # Try system Chrome first, then fall back to bundled Chromium + try: + _browser = await pw.chromium.launch( + channel="chrome", + headless=True, + args=launch_args, + ) + logger.info("System Chrome launched for PDF rendering") + except Exception: + logger.warning( + "System Chrome not available, falling back to bundled Chromium" + ) + _browser = await pw.chromium.launch( + headless=True, + args=launch_args, + ) + logger.info("Bundled Chromium launched for PDF rendering") + return _browser + + +async def shutdown_browser(): + """Gracefully close the browser on application shutdown.""" + global _browser + async with _browser_lock: + if _browser and _browser.is_connected(): + await _browser.close() + _browser = None + logger.info("Chromium browser closed") + + +async def render_pdf_from_html( + html_content: str, + *, + format: str = "A4", + print_background: bool = True, + prefer_css_page_size: bool = True, + wait_timeout: int = 30000, +) -> bytes: + """Render HTML string to PDF bytes using Playwright Chromium. + + Generates a base PDF (content only, no decorative header/label), + then creates a one-page overlay with the header image and right-side + label, and stamps the overlay onto content pages (page 3 → last + content page) using pypdf. Pages 1-2 (cover/TOC) and trailing + full-page image pages get no overlay. + + Args: + html_content: Complete HTML document string. + format: Page format (default A4). + print_background: Include background colors/images. + prefer_css_page_size: Use @page CSS rules for sizing. + wait_timeout: Max time (ms) to wait for page load. + + Returns: + PDF file bytes. + """ + browser = await _get_browser() + context = await browser.new_context( + viewport={"width": 794, "height": 1123}, # A4 at 96dpi + device_scale_factor=1, + java_script_enabled=True, + ) + page = await context.new_page() + + try: + # Write HTML to a temp file so Chromium can load local file:// resources + with tempfile.NamedTemporaryFile( + mode="w", + suffix=".html", + delete=False, + encoding="utf-8", + ) as tmp: + tmp.write(html_content) + tmp_path = tmp.name + + try: + file_url = Path(tmp_path).as_uri() + await page.goto(file_url, wait_until="load", timeout=wait_timeout) + + # Wait for fonts and images to be fully loaded + await page.evaluate("() => document.fonts.ready") + await page.evaluate(""" + () => { + const images = Array.from(document.querySelectorAll('img')); + return Promise.all(images.map(img => { + if (img.complete) return Promise.resolve(); + return new Promise(r => { + img.addEventListener('load', r); + img.addEventListener('error', r); + }); + })); + } + """) + + # ── Collect info from DOM before hiding elements ── + header_src = await page.evaluate(""" + () => { + const img = document.querySelector('.page-header img'); + return img ? img.src : ''; + } + """) + label_src = await page.evaluate(""" + () => { + const img = document.querySelector('.hb-right-label img'); + return img ? img.src : ''; + } + """) + num_bottom_pages = await page.evaluate(""" + () => document.querySelectorAll('.fullpage-img-wrap').length + """) + # Cover page count: cover + TOC image (each is a .cover-page) + num_cover_pages = await page.evaluate(""" + () => document.querySelectorAll('.cover-page').length + """) + + logger.info( + "Overlay info: header=%s, label=%s, covers=%d, bottoms=%d", + bool(header_src), bool(label_src), + num_cover_pages, num_bottom_pages, + ) + + # ── Hide header, footer, and label from the base PDF ── + await page.evaluate(""" + () => { + document.querySelectorAll('.page-header, .page-footer, .hb-right-label') + .forEach(el => el.style.display = 'none'); + } + """) + + # ── Render BASE PDF (no header, no label) ── + base_pdf = await page.pdf( + format=format, + print_background=print_background, + prefer_css_page_size=prefer_css_page_size, + margin={ + "top": "2.54cm", + "right": "2.54cm", + "bottom": "2.54cm", + "left": "2.54cm", + }, + display_header_footer=True, + header_template='', + footer_template=( + '
' + '
' + ), + ) + logger.info("Base PDF rendered, size=%d bytes", len(base_pdf)) + + finally: + os.unlink(tmp_path) + + # ── Build overlay (header + label) and stamp onto content pages ── + if not header_src and not label_src: + logger.info("No header or label to overlay, returning base PDF") + return base_pdf + + overlay_pdf = await _build_overlay_pdf( + page, header_src, label_src, format, wait_timeout + ) + + merged = _stamp_overlay( + base_pdf, overlay_pdf, + skip_front=num_cover_pages, + skip_back=num_bottom_pages, + ) + logger.info("Final PDF with overlay, size=%d bytes", len(merged)) + return merged + + finally: + await context.close() + + +async def _build_overlay_pdf( + page, header_src: str, label_src: str, + format: str, timeout: int, +) -> bytes: + """Render a single-page transparent overlay PDF with header + label.""" + parts = [] + if header_src: + parts.append( + f'
' + f'
' + ) + if label_src: + # Word doc: 3.0cm × 22.7cm container, ~0.35cm bleeds past right edge. + # Scaled proportionally to A4: 2.9cm × 24.1cm, right:-0.3cm to + # let part bleed off-page just like the Word original. + parts.append( + f'
' + f'
' + ) + + overlay_html = ( + '' + '' + + '\n'.join(parts) + + '
' + '' + ) + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".html", delete=False, encoding="utf-8", + ) as tmp: + tmp.write(overlay_html) + tmp_path = tmp.name + + try: + await page.goto( + Path(tmp_path).as_uri(), + wait_until="load", + timeout=timeout, + ) + await page.evaluate("() => document.fonts.ready") + await page.evaluate(""" + () => { + const images = Array.from(document.querySelectorAll('img')); + return Promise.all(images.map(img => { + if (img.complete) return Promise.resolve(); + return new Promise(r => { + img.addEventListener('load', r); + img.addEventListener('error', r); + }); + })); + } + """) + + overlay_bytes = await page.pdf( + format=format, + print_background=True, + prefer_css_page_size=True, + margin={"top": "0", "right": "0", "bottom": "0", "left": "0"}, + display_header_footer=False, + ) + logger.info("Overlay PDF rendered, size=%d bytes", len(overlay_bytes)) + return overlay_bytes + finally: + os.unlink(tmp_path) + + +def _stamp_overlay( + base_pdf: bytes, + overlay_pdf: bytes, + skip_front: int = 2, + skip_back: int = 4, +) -> bytes: + """Merge overlay onto content pages of the base PDF. + + Pages 0..(skip_front-1) and (total-skip_back)..(total-1) are left + untouched. All other pages get the overlay stamped on top. + """ + import io + from pypdf import PdfReader, PdfWriter + + base = PdfReader(io.BytesIO(base_pdf)) + overlay_reader = PdfReader(io.BytesIO(overlay_pdf)) + overlay_page = overlay_reader.pages[0] + writer = PdfWriter() + + total = len(base.pages) + first_content = skip_front # e.g. page index 2 + last_content = total - skip_back - 1 # e.g. total-5 + + for i, pg in enumerate(base.pages): + if first_content <= i <= last_content: + pg.merge_page(overlay_page) + writer.add_page(pg) + + buf = io.BytesIO() + writer.write(buf) + return buf.getvalue() diff --git a/app/services/pdf_service.py b/app/services/pdf_service.py new file mode 100644 index 0000000000000000000000000000000000000000..7fceed01fa8be01805cd3461d28538e2e237a1ea --- /dev/null +++ b/app/services/pdf_service.py @@ -0,0 +1,213 @@ +"""PDF generation service — Playwright Chromium PDF export. + +Uses headless Chromium via Playwright to render the handbook HTML +and export a print-quality PDF. Replaces the previous WeasyPrint +approach for better CSS support and visual fidelity. +""" + +from __future__ import annotations + +import asyncio +import logging +import os + +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +# No in-memory cache — every request fetches fresh data from the database + + +def _get_images_config() -> dict[str, Any]: + """Build images config from the images directory. + + Mirrors PHP getImagesConfig(). + """ + from app.core.config import get_settings + + settings = get_settings() + images_dir = os.path.realpath(settings.images_dir) + + if not os.path.isdir(images_dir): + raise RuntimeError(f"Handbook images directory not found: {images_dir}") + + cover_image = os.path.join(images_dir, "first_page.jpg") + toc_image = os.path.join(images_dir, "toc.jpg") + header_image = os.path.join(images_dir, "kenya_airlift_header.jpg") + label_image = os.path.join(images_dir, "label.png") + + # Auto-discover bottom page images (sorted by filename, prefer .jpg over .png dupes) + _bottom_candidates = sorted( + f for f in os.listdir(images_dir) + if f.lower().endswith((".jpg", ".png")) + and "page-000" in f + and f != "first_page.jpg" + ) + # Deduplicate: if both .jpg and .png exist for same stem, keep .jpg + _seen_stems: set[str] = set() + bottom_pages: list[str] = [] + for f in _bottom_candidates: + stem = os.path.splitext(f)[0] + if stem in _seen_stems: + continue + _seen_stems.add(stem) + bottom_pages.append(os.path.join(images_dir, f)) + + # Validate required images exist + required = [cover_image, toc_image, header_image, label_image] + bottom_pages + missing = [p for p in required if not os.path.isfile(p)] + if missing: + logger.warning("Missing handbook images: %s", missing) + # Don't crash — generate with what we have + + return { + "imagesDir": images_dir, + "chroot": os.path.dirname(images_dir), + "coverImage": cover_image if os.path.isfile(cover_image) else "", + "tocImage": toc_image if os.path.isfile(toc_image) else "", + "headerImage": header_image if os.path.isfile(header_image) else "", + "labelImage": label_image if os.path.isfile(label_image) else "", + "bottomPages": [p for p in bottom_pages if os.path.isfile(p)], + "httpsBaseHandbook": "", + } + + +def render_pdf(html: str) -> bytes: + """Render HTML to PDF bytes using Playwright Chromium. + + This is a synchronous wrapper around the async Playwright renderer. + For async contexts, use render_pdf_async() instead. + """ + import asyncio + + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop and loop.is_running(): + # Already in an async context — create a new task + import concurrent.futures + with concurrent.futures.ThreadPoolExecutor() as pool: + future = pool.submit(asyncio.run, _render_pdf_async(html)) + return future.result() + else: + return asyncio.run(_render_pdf_async(html)) + + +async def _render_pdf_async(html: str) -> bytes: + """Async: render HTML to PDF bytes via Playwright Chromium.""" + from app.services.pdf_renderer import render_pdf_from_html + return await render_pdf_from_html(html) + + +async def generate_handbook_pdf( + catalog_id: int = 0, + include_inactive_programs: bool = False, + debug: bool = False, +) -> bytes: + """Full pipeline: fetch data -> Jinja2 HTML -> Playwright Chromium -> PDF. + + Mirrors the PHP download.php flow with Playwright as the rendering engine. + + Optimizations over the naive sequential approach: + - Parallel API fetches (global + university sections concurrently) + - Parallel campus image prefetching (async batch instead of serial) + """ + from app.services.data_fetcher import fetch_global_sections, fetch_university_sections + from app.services.html_builder import build_handbook_html + from app.services.pdf_renderer import render_pdf_from_html + from app.services.renderers import prefetch_images + + images = _get_images_config() + + # Fetch global sections and university sections in parallel + globals_data, by_uni = await asyncio.gather( + fetch_global_sections(catalog_id), + fetch_university_sections(), + ) + + # Collect all campus image URLs and prefetch them in parallel + campus_urls: list[str] = [] + for uid, uni in by_uni.items(): + if not isinstance(uni, dict): + continue + sections = uni.get("sections", []) + if not isinstance(sections, list): + continue + for s in sections: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + if k in ("campus_image", "image"): + j = s.get("section_json", {}) + if isinstance(j, dict): + url = str(j.get("image_url", "")).strip() + if url: + campus_urls.append(url) + + if campus_urls: + await prefetch_images(campus_urls) + + html = build_handbook_html( + globals_data, + by_uni, + images, + allow_remote=True, + include_inactive_programs=include_inactive_programs, + debug=debug, + ) + + pdf_bytes = await render_pdf_from_html(html) + + return pdf_bytes + + +async def generate_handbook_html( + catalog_id: int = 0, + include_inactive_programs: bool = False, + debug: bool = False, +) -> str: + """Full pipeline: fetch data -> Jinja2 HTML (no PDF conversion).""" + from app.services.data_fetcher import fetch_global_sections, fetch_university_sections + from app.services.html_builder import build_handbook_html + from app.services.renderers import prefetch_images + + images = _get_images_config() + + globals_data, by_uni = await asyncio.gather( + fetch_global_sections(catalog_id), + fetch_university_sections(), + ) + + # Prefetch campus images in parallel for the HTML build + campus_urls: list[str] = [] + for uid, uni in by_uni.items(): + if not isinstance(uni, dict): + continue + sections = uni.get("sections", []) + if not isinstance(sections, list): + continue + for s in sections: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + if k in ("campus_image", "image"): + j = s.get("section_json", {}) + if isinstance(j, dict): + url = str(j.get("image_url", "")).strip() + if url: + campus_urls.append(url) + + if campus_urls: + await prefetch_images(campus_urls) + + return build_handbook_html( + globals_data, + by_uni, + images, + allow_remote=True, + include_inactive_programs=include_inactive_programs, + debug=debug, + ) diff --git a/app/services/renderers.py b/app/services/renderers.py new file mode 100644 index 0000000000000000000000000000000000000000..1f795e226248e019479daf36b085790509c861cf --- /dev/null +++ b/app/services/renderers.py @@ -0,0 +1,1097 @@ +"""Renderers — mirrors PHP renderers.php. + +Contains functions for rendering: +- Table of Contents (TOC) +- Global section blocks (overview, steps, bullets, tables, doc_v1, etc.) +- University section blocks (overview, benefits, programs) +- Remote image fetching as data URIs +""" + +from __future__ import annotations + +import base64 +import logging +import re +from typing import Any + +import httpx + +from app.services.utils import ( + emphasize_keywords, + format_money_figures, + get_any, + h, + hb_slug, + is_assoc, + is_truthy, +) + +logger = logging.getLogger(__name__) + + +# ========================================= +# Image fetching (with in-memory cache + async batch support) +# ========================================= + +_image_cache: dict[str, str] = {} + + +def _detect_image_mime(data: bytes, content_type: str) -> str: + """Detect image MIME type from headers or magic bytes.""" + if "image/" in content_type: + return content_type.split(";")[0].strip() + if data[:8].startswith(b"\x89PNG"): + return "image/png" + if data[:3] == b"\xff\xd8\xff": + return "image/jpeg" + if data[:4] == b"GIF8": + return "image/gif" + if data[:4] == b"RIFF" and data[8:12] == b"WEBP": + return "image/webp" + return "" + + +def fetch_image_data_uri(url: str) -> str: + """Fetch a remote image and return as data:... URI. Mirrors PHP fetchImageDataUri.""" + url = url.strip() + if not url: + return "" + + # Check cache first (populated by prefetch_images) + if url in _image_cache: + return _image_cache[url] + + try: + with httpx.Client(verify=False, timeout=12, follow_redirects=True) as client: + resp = client.get(url) + if resp.status_code < 200 or resp.status_code >= 300 or not resp.content: + logger.warning("Image fetch failed for %s status=%d", url, resp.status_code) + _image_cache[url] = "" + return "" + data = resp.content + except Exception as exc: + logger.warning("Image fetch error for %s: %s", url, exc) + _image_cache[url] = "" + return "" + + mime = _detect_image_mime(data, resp.headers.get("content-type", "")) + if not mime.startswith("image/"): + logger.warning("Invalid image mime %s for %s", mime, url) + _image_cache[url] = "" + return "" + + b64 = base64.b64encode(data).decode("ascii") + result = f"data:{mime};base64,{b64}" + _image_cache[url] = result + return result + + +async def prefetch_images(urls: list[str]) -> dict[str, str]: + """Fetch all images in parallel using async HTTP and populate the cache. + + This is the key optimization: instead of fetching ~30 campus images + serially (30-60s), we fetch them all concurrently (~3-5s). + """ + import asyncio + + unique_urls = list({u.strip() for u in urls if u.strip() and u.strip() not in _image_cache}) + if not unique_urls: + return {u: _image_cache.get(u.strip(), "") for u in urls} + + async def _fetch_one(client: httpx.AsyncClient, url: str) -> tuple[str, str]: + try: + resp = await client.get(url) + if resp.status_code < 200 or resp.status_code >= 300 or not resp.content: + logger.warning("Prefetch image failed for %s status=%d", url, resp.status_code) + return url, "" + mime = _detect_image_mime(resp.content, resp.headers.get("content-type", "")) + if not mime.startswith("image/"): + logger.warning("Prefetch invalid mime %s for %s", mime, url) + return url, "" + b64 = base64.b64encode(resp.content).decode("ascii") + return url, f"data:{mime};base64,{b64}" + except Exception as exc: + logger.warning("Prefetch image error for %s: %s", url, exc) + return url, "" + + logger.info("Prefetching %d campus images in parallel...", len(unique_urls)) + async with httpx.AsyncClient(verify=False, timeout=15, follow_redirects=True) as client: + results = await asyncio.gather(*[_fetch_one(client, u) for u in unique_urls]) + + fetched = 0 + for url, data_uri in results: + _image_cache[url] = data_uri + if data_uri: + fetched += 1 + + logger.info("Prefetched %d/%d images successfully", fetched, len(unique_urls)) + return {u: _image_cache.get(u.strip(), "") for u in urls} + + +# ========================================= +# Funding extraction +# ========================================= + +def _extract_university_funding( + j: dict, + school_meta: dict | None = None, +) -> tuple[str, list[str]]: + """Extract funding heading + items from benefits section JSON. + + Priority: + 1. section_json.funding.options + 2. section_json.funding_available + 3. fallback from pth_ref_schools.school_category + """ + if not isinstance(j, dict): + j = {} + + heading = "Funding Available" + items: list[str] = [] + + # 1. Preferred normalized shape + funding = j.get("funding", {}) + if isinstance(funding, dict): + subheading = str(funding.get("subheading", "")).strip() + if subheading: + heading = subheading + + options = funding.get("options", []) + if isinstance(options, list): + for opt in options: + if not isinstance(opt, dict): + continue + name = str(opt.get("name", "")).strip() + amount = str(opt.get("amount", "")).strip() + + if name and amount: + items.append(f"{name} - {amount}") + elif name: + items.append(name) + elif amount: + items.append(amount) + + # 2. Legacy fallback shape + if not items: + funding_available = j.get("funding_available", []) + if isinstance(funding_available, list): + for item in funding_available: + text = str(item).strip() + if text: + items.append(text) + + # 3. School-category fallback + if not items and isinstance(school_meta, dict): + school_category = str(school_meta.get("school_category", "")).strip().lower() + status = str(school_meta.get("status", "")).strip().lower() + + if status == "in": + if school_category == "non_cosigner": + items = [ + "ISP Study Loan - $10,000", + "Partner 1 (Unsecured Loan) - Up to $50,000 per academic year", + "Partner 3 (Credit Option) - Up to $15,000", + ] + elif school_category == "cosigner": + items = [ + "ISP Study Loan - $10,000", + "Partner 2 (A Cosigned Loan) - Full Coverage Support", + "Partner 3 (Credit Option) - Up to $15,000", + ] + + return (heading, items) + + +# ========================================= +# TOC sorting and rendering +# ========================================= + +def sort_toc(items: list[dict]) -> list[dict]: + """Mirrors PHP sortHandbookToc — sort by sort_order/sort, stable fallback.""" + for idx, e in enumerate(items): + e.setdefault("_i", idx) + + def key_fn(e: dict): + so = e.get("sort_order", e.get("sort")) + if so is not None: + try: + so_num = float(so) + return (0, so_num, e.get("_i", 0)) + except (ValueError, TypeError): + pass + return (1, 0.0, e.get("_i", 0)) + + items.sort(key=key_fn) + for e in items: + e.pop("_i", None) + return items + + +def render_toc(items: list[dict], debug: bool = False, show_pages: bool = True) -> str: + """Render Table of Contents HTML (DOMPDF-safe). + + Mirrors PHP renderToc(). + """ + sorted_items = sort_toc(items) + + out = '' + out += '
' + out += '
Table of Contents
' + out += ( + '' + '' + ) + + for e in sorted_items: + if not isinstance(e, dict): + continue + title = str(e.get("title", "")).strip() + target = str(e.get("target", e.get("anchor", ""))).strip() + if not title: + continue + + level = max(0, min(3, int(e.get("level", 0)))) + bold = bool(e.get("bold", False)) + upper = bool(e.get("upper", False)) + if level == 0: + bold = True + upper = True + + row_class = "toc-row--major" if level == 0 else "toc-row--sub" + if level >= 2: + row_class += " toc-row--deep" + + text = title.upper() if upper else title + title_inner = h(text) + if target: + title_inner = f'{title_inner}' + if bold: + title_inner = f"{title_inner}" + + page = str(e.get("page", "")).strip() + if show_pages and page: + page_cell = f"{h(page)}" + else: + page_cell = " " + + indent = "" + if level == 1: + indent = "padding-left:16px;" + elif level >= 2: + indent = "padding-left:30px;" + + title_style = ( + "vertical-align:bottom; padding:1px 4px 1px 0; font-size:10px; " + "line-height:1.15; color:#111;" + + (" font-weight:700;" if bold else " font-weight:400;") + + (" text-transform:uppercase; letter-spacing:0.1px;" if upper else "") + + (f" {indent}" if indent else "") + ) + + out += f'' + out += f'' + out += '' + out += ( + f'' + ) + out += "" + + out += "
{title_inner} {page_cell}
" + return out + + +def render_toc_hardcoded( + items: list[dict], + debug: bool = False, + page_start: int = 3, + page_offset: int = 0, +) -> str: + """Mirrors PHP renderTocHardcoded — sort, assign sequential pages, render.""" + sorted_items = sort_toc(items) + + seq = max(1, page_start) + for item in sorted_items: + p = str(item.get("page", "")).strip() + if p and p.lstrip("-").isdigit(): + display = int(p) + page_offset + item["page"] = str(display) + if display >= seq: + seq = display + 1 + else: + item["page"] = str(seq) + seq += 1 + + out = "\n" + out += '
' + out += '

Table of Contents

' + out += ( + '' + '' + ) + + for e in sorted_items: + if not isinstance(e, dict): + continue + title = str(e.get("title", "")).strip() + target = str(e.get("target", e.get("anchor", ""))).strip() + if not title: + continue + + level = max(0, min(3, int(e.get("level", 0)))) + bold = bool(e.get("bold", False)) + upper = bool(e.get("upper", False)) + if level == 0: + bold = True + upper = True + + row_class = "toc-row--major" if level == 0 else "toc-row--sub" + if level >= 2: + row_class += " toc-row--deep" + + text = title.upper() if upper else title + title_inner = h(text) + if target: + title_inner = f'{title_inner}' + if bold: + title_inner = f"{title_inner}" + + page = str(e.get("page", "")).strip() + page_html = f"{h(page)}" if page else " " + + indent = "" + if level == 1: + indent = "padding-left:16px;" + elif level >= 2: + indent = "padding-left:30px;" + + title_style = ( + "vertical-align:bottom;padding:1px 4px 1px 0;font-size:10px;" + "line-height:1.15;color:#111;" + + ("font-weight:700;" if bold else "font-weight:400;") + + ("text-transform:uppercase;letter-spacing:0.1px;" if upper else "") + + indent + ) + + out += f'' + out += f'' + out += '' + out += ( + f'' + ) + out += "" + + out += "
{title_inner} {page_html}
" + return out + + +# ========================================= +# table_v3 / table_v4 cell helpers +# ========================================= + +# Mapping of style names → inline CSS strings for table_v3/v4 cells +_V3_STYLE_MAP: dict[str, str] = { + "band_teal": "text-align:center;font-weight:700;color:#fff;background:#199970;", + "band_navy": "text-align:center;font-weight:700;color:#fff;background:#0263A3;", + "bold_amounts": "font-weight:600;", + "green_center_bold": "text-align:center;font-weight:700;color:#199970;", + "center_bold_multiline": "text-align:center;font-weight:600;vertical-align:middle;", + "footer_center_bold": "text-align:center;font-weight:700;background:#f5f5f5;", + "covered_merged": "vertical-align:top;font-size:9pt;line-height:1.5;", +} + + +def _parse_v3_cell(cell: Any) -> tuple[str, str, str]: + """Parse a table_v3/v4 cell dict into (attr_str, style_str, html_content).""" + if not isinstance(cell, dict): + text = format_money_figures(str(cell)) if cell else "" + return ("", "", h(text)) + + colspan = 1 + rowspan = 1 + text_val = str(cell.get("text", "")) + cs = cell.get("colspan") + rs = cell.get("rowspan") + if cs is not None and str(cs).isdigit(): + colspan = int(cs) + if rs is not None and str(rs).isdigit(): + rowspan = int(rs) + + attr = "" + if colspan > 1: + attr += f' colspan="{colspan}"' + if rowspan > 1: + attr += f' rowspan="{rowspan}"' + + style_name = str(cell.get("style", "")) + inline_css = _V3_STYLE_MAP.get(style_name, "") + style_str = f' style="{inline_css}"' if inline_css else "" + + # Rich parts within cell (merged cells with multiple text blocks) + parts = cell.get("parts") + if isinstance(parts, list) and parts: + html_parts: list[str] = [] + for p in parts: + if not isinstance(p, dict): + continue + pt = format_money_figures(str(p.get("text", ""))) + if not pt: + continue + if p.get("bold"): + html_parts.append(f"{h(pt)}") + else: + html_parts.append(h(pt)) + content = "

".join(html_parts) if html_parts else h(format_money_figures(text_val)) + else: + content = h(format_money_figures(text_val)) + + return (attr, style_str, content) + + +# ========================================= +# Global blocks renderer +# ========================================= + +def render_global_blocks( + section_key: str, + section_title: str, + json_data: dict | list, + debug: bool = False, + *, + universities: list[dict] | None = None, +) -> str: + """Render a single global section's content. + + Mirrors PHP renderGlobalBlocks() — handles steps, bullets, tables, + doc_v1, table_v2, summary_of_universities, etc. + """ + html_out = "" + key_norm = section_key.lower().strip() + + if not isinstance(json_data, dict): + json_data = {} + + layout_norm = str(json_data.get("layout", "")).lower().strip() + + # ── Section title ── + # Prefer the JSON-level title (display-ready) over the DB section_title + json_title = str(json_data.get("title", "")).strip() if isinstance(json_data, dict) else "" + title = json_title or section_title.strip() + if title and key_norm != "table_of_contents": + html_out += f'

{h(title)}

' + _title_norm = title.lower() + + # ── Steps ── + steps = json_data.get("steps") + if isinstance(steps, list): + step_num = 0 + for s in steps: + if not isinstance(s, dict): + continue + step_num += 1 + step_title = str(s.get("title", s.get("step_title", ""))).strip() + body = format_money_figures(str(s.get("body", s.get("description", ""))).strip()) + + html_out += '
' + if step_title: + html_out += f'
Step {step_num}: {h(step_title)}
' + if body: + html_out += f'

{emphasize_keywords(body)}

' + + links = s.get("links", []) + if isinstance(links, list) and links: + html_out += '" + + qr = str(s.get("qr_url", s.get("qr_image", ""))).strip() + if qr: + html_out += f'QR' + + html_out += "
" + return html_out + + # ── Bullets ── + has_bullets = isinstance(json_data.get("bullets"), list) + has_items = isinstance(json_data.get("items"), list) + if has_bullets or (layout_norm == "bullets_with_note" and has_items): + lst = json_data.get("items") if has_items else json_data.get("bullets") + html_out += '" + + note = format_money_figures(str(json_data.get("note", json_data.get("footnote", ""))).strip()) + if note: + html_out += f'
{h(note)}
' + return html_out + + # ── Basic table ── + cols = json_data.get("columns") + rows = json_data.get("rows") + if isinstance(cols, list) and isinstance(rows, list): + html_out += '' + if cols: + html_out += "" + for c in cols: + html_out += f"" + html_out += "" + html_out += "" + + for r in rows: + if not isinstance(r, (list, dict)): + continue + html_out += "" + if isinstance(r, dict): + for col_label in cols: + key_guess = re.sub(r"[^a-z0-9]+", "_", str(col_label).lower()) + cell = r.get(key_guess, "") + html_out += f"" + else: + for cell in r: + html_out += f"" + html_out += "" + + html_out += "
{h(str(c))}
{h(format_money_figures(str(cell)))}{h(format_money_figures(str(cell)))}
" + return html_out + + # ── table_v2 ── + if layout_norm == "table_v2": + base_cols = json_data.get("base_columns", []) + groups = json_data.get("header_groups", []) + rows = json_data.get("rows", []) + if not isinstance(base_cols, list): + base_cols = [] + if not isinstance(groups, list): + groups = [] + if not isinstance(rows, list): + rows = [] + + all_cols: list[dict] = [] + for c in base_cols: + if isinstance(c, dict): + all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))}) + for g in groups: + if not isinstance(g, dict): + continue + g_cols = g.get("columns", []) + if not isinstance(g_cols, list): + g_cols = [] + for c in g_cols: + if isinstance(c, dict): + all_cols.append({"key": str(c.get("key", "")), "label": str(c.get("label", ""))}) + + html_out += '' + has_group_row = bool(groups) + if has_group_row: + html_out += "" + for c in base_cols: + if isinstance(c, dict): + html_out += f'' + for g in groups: + if not isinstance(g, dict): + continue + g_cols = g.get("columns", []) + if not isinstance(g_cols, list): + g_cols = [] + span = max(1, len(g_cols)) + html_out += f'' + html_out += "" + for g in groups: + if not isinstance(g, dict): + continue + g_cols = g.get("columns", []) + if not isinstance(g_cols, list): + g_cols = [] + for c in g_cols: + if isinstance(c, dict): + html_out += f'' + html_out += "" + else: + html_out += "" + for c in all_cols: + html_out += f'' + html_out += "" + + html_out += "" + for r in rows: + if not isinstance(r, dict): + continue + html_out += "" + for c in all_cols: + k = c.get("key", "") + val = r.get(k, "") + if isinstance(val, dict): + val = val.get("text", "") + html_out += f"" + html_out += "" + html_out += "
{h(str(c.get("label", "")))}{h(str(g.get("label", "")))}
{h(str(c.get("label", "")))}
{h(c.get("label", ""))}
{h(format_money_figures(str(val)))}
" + return html_out + + # ── doc_v1 ── + if layout_norm == "doc_v1" and isinstance(json_data.get("blocks"), list): + for b in json_data["blocks"]: + if not isinstance(b, dict): + continue + btype = str(b.get("type", "")) + + # Skip heading/subheading blocks that duplicate the section title + if btype in ("heading", "subheading"): + block_text = str(b.get("text", "")).strip().lower() + if block_text == _title_norm: + continue + + if btype == "paragraph": + t = format_money_figures(str(b.get("text", ""))) + if t.strip(): + html_out += f'

{emphasize_keywords(t)}

' + + elif btype == "subheading": + t = format_money_figures(str(b.get("text", ""))) + if t.strip(): + html_out += f'

{h(t)}

' + + elif btype == "bullets": + items = b.get("items", []) + if not isinstance(items, list): + items = [] + html_out += '" + + elif btype == "numbered_list": + items = b.get("items", []) + if not isinstance(items, list): + items = [] + html_out += '
    ' + for it in items: + it_str = format_money_figures(str(it).strip()) + if it_str: + html_out += f"
  1. {emphasize_keywords(it_str)}
  2. " + html_out += "
" + + elif btype == "note": + t = format_money_figures(str(b.get("text", ""))) + if t.strip(): + html_out += f'
{h(t)}
' + + elif btype == "note_inline": + parts = b.get("parts", []) + if not isinstance(parts, list): + parts = [] + txt = "" + for p in parts: + if not isinstance(p, dict): + continue + t = format_money_figures(str(p.get("text", ""))) + if not t: + continue + style = str(p.get("style", "")) + if style == "red_bold": + txt += f"{h(t)}" + else: + txt += h(t) + if re.sub(r"<[^>]+>", "", txt).strip(): + html_out += f'
{txt}
' + + elif btype == "table_v1": + t_cols = b.get("columns", []) + t_rows = b.get("rows", []) + if not isinstance(t_cols, list): + t_cols = [] + if not isinstance(t_rows, list): + t_rows = [] + html_out += '' + if t_cols: + html_out += "" + for c in t_cols: + html_out += f"" + html_out += "" + html_out += "" + for r in t_rows: + if not isinstance(r, list): + continue + html_out += "" + for cell in r: + html_out += f"" + html_out += "" + html_out += "
{h(str(c))}
{h(format_money_figures(str(cell)))}
" + + elif btype == "table": + # Generic table (columns may be objects or strings, rows may be dicts or lists) + t_cols = b.get("columns", []) + t_rows = b.get("rows", []) + if not isinstance(t_cols, list): + t_cols = [] + if not isinstance(t_rows, list): + t_rows = [] + col_labels = [] + col_keys = [] + for c in t_cols: + if isinstance(c, dict): + col_labels.append(str(c.get("label", c.get("key", "")))) + col_keys.append(str(c.get("key", ""))) + else: + col_labels.append(str(c)) + col_keys.append(re.sub(r"[^a-z0-9]+", "_", str(c).lower())) + html_out += '' + if col_labels: + html_out += "" + for lbl in col_labels: + html_out += f"" + html_out += "" + html_out += "" + for r in t_rows: + html_out += "" + if isinstance(r, dict): + for k in col_keys: + cell = r.get(k, "") + html_out += f"" + elif isinstance(r, list): + for cell in r: + html_out += f"" + html_out += "" + html_out += "
{h(lbl)}
{h(format_money_figures(str(cell)))}{h(format_money_figures(str(cell)))}
" + + elif btype in ("table_v3", "table_v4"): + t_rows = b.get("rows", []) + h_rows = b.get("header_rows", []) + col_widths = b.get("col_width_pct", []) + if not isinstance(t_rows, list): + t_rows = [] + if not isinstance(h_rows, list): + h_rows = [] + if not isinstance(col_widths, list): + col_widths = [] + + html_out += '' + + # optional col widths + if col_widths: + html_out += "" + for w in col_widths: + html_out += f'' + html_out += "" + + # header rows + if h_rows: + html_out += "" + for hr in h_rows: + if not isinstance(hr, list): + continue + html_out += "" + for cell in hr: + c_attr, c_style, c_text = _parse_v3_cell(cell) + html_out += f"{c_text}" + html_out += "" + html_out += "" + + # body rows + html_out += "" + for r in t_rows: + if not isinstance(r, list): + continue + html_out += "" + for cell in r: + c_attr, c_style, c_text = _parse_v3_cell(cell) + html_out += f"{c_text}" + html_out += "" + html_out += "
" + + return html_out + + # ── Fallback ── + if "text" in json_data: + html_out += f'

{h(format_money_figures(str(json_data["text"])))}

' + + if not html_out.strip(): + logger.warning( + "Empty section render for key=%s title=%s", + section_key, section_title, + ) + + return html_out + + +# ========================================= +# University section renderer +# ========================================= + +def render_university_section( + uni_name: str, + sections: list[dict], + allow_remote: bool, + is_first_uni: bool, + include_inactive_programs: bool = False, + website_url: str = "", + anchor_id: str | None = None, + debug: bool = False, + stats: dict | None = None, + sort_order: int | None = None, +) -> str: + """Render a single university section. Mirrors PHP renderUniversitySection.""" + classes = ["uni"] + if not is_first_uni: + classes.append("page-break") + + id_attr = f' id="{h(anchor_id)}"' if anchor_id else "" + sort_attr = f' data-sort="{h(str(sort_order))}"' if sort_order is not None else "" + + out = f'
' + + has_stats = isinstance(stats, dict) + if has_stats: + stats["universities"] = stats.get("universities", 0) + 1 + + # Build map; merge duplicate "programs" sections + sec_map: dict[str, dict] = {} + for s in sections: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + if not k: + continue + if k == "programs" and k in sec_map: + existing = sec_map["programs"].get("section_json", {}) + incoming = s.get("section_json", {}) + if not isinstance(existing, dict): + existing = {} + if not isinstance(incoming, dict): + incoming = {} + a = existing.get("programs", []) + b = incoming.get("programs", []) + if not isinstance(a, list): + a = [] + if not isinstance(b, list): + b = [] + existing["programs"] = a + b + sec_map["programs"]["section_json"] = existing + continue + sec_map[k] = s + + # Campus image + img_section = sec_map.get("campus_image") or sec_map.get("image") + campus_url = "" + campus_cap = "" + if img_section: + j = img_section.get("section_json", {}) + if isinstance(j, dict): + campus_url = str(j.get("image_url", "")).strip() + campus_cap = str(j.get("caption", "")).strip() + + # Overview data + website + overview_json: dict | None = None + resolved_website = (website_url or "").strip() + + if "overview" in sec_map: + overview_json = sec_map["overview"].get("section_json", {}) + if not isinstance(overview_json, dict): + overview_json = {} + site_from_overview = get_any( + overview_json, + ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"], + ) + if not resolved_website and site_from_overview: + resolved_website = site_from_overview + + # 1. University title + if resolved_website: + if has_stats: + stats["university_links"] = stats.get("university_links", 0) + 1 + out += ( + f'
{h(uni_name)}
' + ) + else: + out += f'
{h(uni_name)}
' + + # 2-3. Two-column: Summary + Campus image + image_embedded = False + campus_cell = "" + if allow_remote and campus_url: + embedded = fetch_image_data_uri(campus_url) + if embedded: + image_embedded = True + campus_cell = f'Campus Image' + if campus_cap: + campus_cell += f'
{h(campus_cap)}
' + else: + campus_cell = '
Campus image unavailable
' + else: + campus_cell = '
Campus image unavailable
' + + if has_stats: + if image_embedded: + stats["images_embedded"] = stats.get("images_embedded", 0) + 1 + else: + stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1 + + summary_cell = "" + if overview_json is not None: + j = overview_json + founded = get_any(j, ["founded", "Founded"]) + total = get_any(j, ["total_students", "Total Students"]) + undergrad = get_any(j, ["undergraduates", "Undergraduate Students", "undergraduate_students"]) + postgrad = get_any(j, ["postgraduate_students", "Postgraduate Students"]) + acc_rate = get_any(j, ["acceptance_rate", "Acceptance Rate"]) + location = get_any(j, ["location", "Location"]) + tuition = get_any(j, [ + "tuition_out_of_state_yearly", + "Yearly Out of State Tuition Fees", + "Yearly Out-of-State Tuition Fees", + "Yearly Tuition Fees", + "Yearly Out-of-State Tuition Fees:", + ]) + + summary_cell += '
Summary info
' + summary_cell += '" + + if resolved_website: + if has_stats: + stats["website_rows"] = stats.get("website_rows", 0) + 1 + summary_cell += ( + f'
Website: ' + f'' + f'{h(resolved_website)}
' + ) + + out += ( + '' + f'' + f'' + "
{summary_cell}{campus_cell}
" + ) + + # 4. Benefits + if "benefits" in sec_map: + j = sec_map["benefits"].get("section_json", {}) + if not isinstance(j, dict): + j = {} + benefits = j.get("benefits", []) + if not isinstance(benefits, list): + benefits = [] + + out += '
' + out += '
Benefits for ISP students at this school
' + if benefits: + out += '" + else: + out += '
No benefits listed.
' + out += "
" + + # 5. Programs + if "programs" in sec_map: + j = sec_map["programs"].get("section_json", {}) + if not isinstance(j, dict): + j = {} + programs = j.get("programs", []) + if not isinstance(programs, list): + programs = [] + + # Filter inactive + if not include_inactive_programs: + def _is_active(p: dict) -> bool: + flag = p.get("program_active", p.get("is_active", p.get("active", 1))) + return is_truthy(flag) + + programs = [p for p in programs if isinstance(p, dict) and _is_active(p)] + + out += ( + '
To qualify for The International Scholars Program at ' + f"{h(uni_name)}, you must be willing to study any of the following programs:
" + ) + + if programs: + out += '' + out += ( + '' + '' + '' + ) + + for p in programs: + if not isinstance(p, dict): + continue + + program_name = str(p.get("program_name", "")).strip() + link = str(p.get("program_link", "")).strip() + if not link and isinstance(p.get("program_links"), dict): + link = str(p["program_links"].get("web_link", "")).strip() + + program_name_html = h(program_name) + if link: + program_name_html = f'{program_name_html}' + + + + entrance = str(p.get("entrance_exam", p.get("entrance_examination", ""))) + designation = str(p.get("designation", "")) + out += ( + f"" + f"" + f"" + f"" + f"" + ) + + out += "
ProgramDesignationEntrance Examination
{program_name_html}{h(designation)}{h(entrance)}
" + else: + out += '
No programs listed.
' + + # Extra sections + skip_keys = {"campus_image", "image", "overview", "benefits", "programs"} + for s in sections: + if not isinstance(s, dict): + continue + k = str(s.get("section_key", "")) + if not k or k in skip_keys: + continue + title = str(s.get("section_title", "")) + j = s.get("section_json", {}) + if not isinstance(j, dict): + j = {} + out += render_global_blocks(k, title, j, debug) + + out += "
" + return out diff --git a/app/services/utils.py b/app/services/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..435b5da819c3089325f5489b43d25ac8a25ca1c5 --- /dev/null +++ b/app/services/utils.py @@ -0,0 +1,259 @@ +"""Utility functions shared across renderers. + +Mirrors PHP helpers: h(), formatMoneyFigures(), handbook_anchor(), etc. +""" + +from __future__ import annotations + +import html +import re + + +def h(s: str) -> str: + """HTML-escape (mirrors PHP h()).""" + return html.escape(str(s), quote=True) + + +def is_assoc(a: list | dict) -> bool: + """Check if an array is associative (dict-like) vs sequential list.""" + return isinstance(a, dict) + + +def hb_slug(s: str) -> str: + """Slug helper for anchors.""" + tmp = s.lower().strip() + tmp = re.sub(r"[^a-z0-9]+", "_", tmp, flags=re.IGNORECASE) + tmp = re.sub(r"_+", "_", tmp) + return tmp.strip("_") + + +def handbook_anchor(prefix: str, text: str, idx: int) -> str: + """Normalise a string into a safe anchor id. Mirrors PHP handbook_anchor.""" + base = text.lower().strip() + base = re.sub(r"[^a-z0-9]+", "-", base, flags=re.IGNORECASE) + base = base.strip("-") + if not base: + base = f"{prefix}-{idx}" + return f"{prefix}-{base}-{idx}" + + +def is_truthy(val) -> bool: + """Mirrors PHP handbook_true.""" + if isinstance(val, bool): + return val + if isinstance(val, int): + return val != 0 + v = str(val).lower().strip() + return v not in ("0", "false", "") + + +def format_money_figures(text: str) -> str: + """Normalize all monetary figures to "USD X,XXX" format. + + - Converts existing $X,XXX → USD X,XXX + - Normalizes bare large numbers (1,000+) → USD X,XXX + - Formats with commas + - Currency type is always USD (no $ symbol) + """ + if not text: + return text + + # Step 1: Convert "$X" → "USD X" directly (preserves ALL dollar amounts) + def _dollar_to_usd(m: re.Match) -> str: + num_str = m.group(1).replace(",", "") + try: + num = float(num_str) + except ValueError: + return m.group(0) + if "." in m.group(1): + dec_part = m.group(1).split(".")[-1] + formatted = f"{num:,.{len(dec_part)}f}" + elif num == int(num): + formatted = f"{int(num):,}" + else: + formatted = f"{num:,.2f}" + return "USD " + formatted + + text = re.sub(r'\$([\d,]+(?:\.\d+)?)', _dollar_to_usd, text) + + # Step 2: Normalize existing "USD X,XXX" for consistent comma formatting + def _normalize_usd(m: re.Match) -> str: + num_str = m.group(1).replace(",", "") + try: + num = float(num_str) + except ValueError: + return m.group(0) + if "." in m.group(1): + dec_part = m.group(1).split(".")[-1] + formatted = f"{num:,.{len(dec_part)}f}" + elif num == int(num): + formatted = f"{int(num):,}" + else: + formatted = f"{num:,.2f}" + return "USD " + formatted + + text = re.sub(r'\bUSD\s+([\d,]+(?:\.\d+)?)', _normalize_usd, text, flags=re.IGNORECASE) + + # Step 3: Add "USD " to bare large numbers (4+ digits or comma-formatted) + # that aren't already preceded by "USD " + def _format_bare_large(m: re.Match) -> str: + num_str = m.group(1).replace(",", "") + dec = m.group(2) if m.group(2) else "" + try: + num = float(num_str) + except ValueError: + return m.group(0) + if dec: + formatted = f"{num:,.{len(dec)}f}" + else: + formatted = f"{num:,.0f}" + return "USD " + formatted + + text = re.sub( + r"(? str: + """Ensure REGULAR/PRIME program options appear together when either appears. + + If only one of the two appears in text, append "(REGULAR and PRIME)" + to preserve source meaning while enforcing consistency. + """ + if not text: + return text + + has_regular = bool(re.search(r"\bREGULAR\b", text, flags=re.IGNORECASE)) + has_prime = bool(re.search(r"\bPRIME\b", text, flags=re.IGNORECASE)) + + if has_regular ^ has_prime: + if re.search(r"\(\s*REGULAR\s+and\s+PRIME\s*\)", text, flags=re.IGNORECASE): + return text + return text.rstrip() + " (REGULAR and PRIME)" + + return text + + +def sort_sections_stable(sections: list[dict]) -> list[dict]: + """Stable sort: sort_order ASC, then id ASC, then insertion order.""" + for i, s in enumerate(sections): + s.setdefault("_i", i) + + def sort_key(s: dict): + so = s.get("sort_order") + sid = s.get("id") + so_key = (0, so) if so is not None else (1, 0) + sid_key = (0, sid) if sid is not None else (1, 0) + return (so_key, sid_key, s.get("_i", 0)) + + sections.sort(key=sort_key) + for s in sections: + s.pop("_i", None) + return sections + + +def get_any(d: dict, keys: list[str]) -> str: + """Return the first non-empty string value found for one of the keys.""" + for k in keys: + v = d.get(k) + if v is None or isinstance(v, (dict, list)): + continue + t = str(v).strip() + if t: + return t + return "" + + +def emphasize_keywords(text: str) -> str: + """Add bold HTML emphasis to key handbook terms in already-escaped text. + + Bolds: REGULAR, PRIME, dollar amounts ($X,XXX), and other critical terms. + Input must already be HTML-escaped. Returns HTML with tags. + """ + if not text: + return text + + escaped = h(text) + + # Bold REGULAR and PRIME (case-insensitive, whole word) + escaped = re.sub( + r'\b(REGULAR|PRIME)\b', + r'\1', + escaped, + flags=re.IGNORECASE, + ) + + # Bold USD amounts like USD 1,000 or USD 500 + escaped = re.sub( + r'\b(USD\s+[\d,]+(?:\.\d+)?)', + r'\1', + escaped, + flags=re.IGNORECASE, + ) + + # Bold standalone USD + escaped = re.sub( + r'\b(USD)\b(?!\s*[\d,])', + r'\1', + escaped, + flags=re.IGNORECASE, + ) + + # Bold dollar-sign amounts like $20, $1,000, $1,000.00 + escaped = re.sub( + r'(\$[\d,]+(?:\.\d+)?)', + r'\1', + escaped, + ) + + # Bold specific GPA values 2.8, 3.4 and 4.0 + escaped = re.sub( + r'\b(2\.8|3\.4|4\.0)\b', + r'\1', + escaped, + ) + + # Bold key qualification and geo terms. + escaped = re.sub( + r'\b(GPA\s*\(\s*Undergraduate\s+Requirement\s*\)|GPA|High\s+School\s+grades|Global|Uganda|Kenya)\b', + r'\1', + escaped, + flags=re.IGNORECASE, + ) + + # Bold refund policy phrase. + escaped = re.sub( + r'\b(Refund\s+Policy)\b', + r'\1', + escaped, + flags=re.IGNORECASE, + ) + + return escaped + + +def linkify_urls(text: str) -> str: + """Convert URLs in text to clickable tags with target="_blank". + + Detects http/https URLs and converts them to proper anchor tags. + Input should be plain text or already HTML-escaped. + Returns HTML with tags. + """ + if not text: + return text + + # Detect and convert http/https URLs to clickable links + # Pattern: http:// or https:// followed by domain and optional path + url_pattern = r'(https?://[^\s<)]+)' + + def make_link(match): + url = match.group(1) + # Clean up trailing punctuation that's likely not part of URL + url = url.rstrip('.,;:!?)\'\"') + return f'{h(url)}' + + return re.sub(url_pattern, make_link, text) diff --git a/app/static/css/print.css b/app/static/css/print.css new file mode 100644 index 0000000000000000000000000000000000000000..5070f49a60f7826c45ec829053df4910f108ffc5 --- /dev/null +++ b/app/static/css/print.css @@ -0,0 +1,1344 @@ +/* ========================================================= + ISP HANDBOOK - CORRECTED PRINT CSS + Uses real page margins (2.54 cm all sides) + Removes negative-margin / negative-offset layout tricks + Keeps decorative elements from breaking content flow + ========================================================= */ + +/* ------------------------------ + PAGE SETUP + ------------------------------ */ +@page { + size: A4; + margin: 2.54cm; +} + +/* Optional cover page: use only if your renderer supports named pages */ +@page cover { + size: A4; + margin: 0; +} + +/* ------------------------------ + RESET + ------------------------------ */ +*, +*::before, +*::after { + box-sizing: border-box; +} + +html, +body { + margin: 0; + padding: 0; + font-family: "Century Gothic", "Segoe UI", Tahoma, Geneva, Verdana, sans-serif; + font-size: 10pt; + line-height: 1.45; + color: #1a1a1a; + text-align: justify; + -webkit-print-color-adjust: exact; + print-color-adjust: exact; + background: #ffffff; +} + +/* ------------------------------ + EXTERNAL PLAYWRIGHT HEADER/FOOTER + Keep hidden in body flow if using templates + ------------------------------ */ +.page-header, +.page-footer { + display: none !important; +} + +.page-header img { + display: block; + width: 8.45in; + height: 1.03in; +} + +/* ------------------------------ + SAFE CONTENT WRAPPER + ------------------------------ */ +.page-content { + display: block; + width: 100%; + max-width: 100%; + margin: 0; + padding: 0; + position: relative; + z-index: 1; + overflow: visible; + word-wrap: break-word; + overflow-wrap: break-word; +} + +/* Prevent accidental blank page after cover */ +.cover-page+.page-content, +.cover-page+.toc-page, +.cover-page+.section-block { + page-break-before: auto; + break-before: auto; +} + +/* Prevent accidental blank page from a leading .page-break */ +.page-content> :first-child.page-break, +.page-content> :first-child.section-block.page-break { + page-break-before: auto; + break-before: auto; +} + +/* ------------------------------ + DECORATIVE RIGHT-SIDE LABEL + Hidden in the base HTML. Rendered as a PDF overlay + by pdf_renderer.py onto content pages only. + Kept here so the loads for src extraction. + Dimensions: 2.5cm × 24.6cm (vertical strip, right edge, matches Word image4.png) + ------------------------------ */ +.hb-right-label { + position: absolute; + top: -9999px; + left: -9999px; + width: 1px; + height: 1px; + overflow: hidden; + pointer-events: none; +} + +.hb-right-label img { + display: block; +} + +/* ------------------------------ + TYPOGRAPHY — matching Word handbook exactly + H1: 12pt, #0263A3, uppercase, no bold, no border + H2-H6: color varies by context, no bold, no border + ------------------------------ */ +.hb-heading-1, +.h2, +h1 { + font-size: 12pt; + font-weight: 700; + color: #0263A3; + margin: 4pt 0 10pt; + padding-bottom: 0; + border-bottom: none; + line-height: 1.25; + text-transform: uppercase; + text-align: left; + text-decoration: none; + page-break-after: avoid; + break-after: avoid; + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-heading-2, +.h3, +h2, +h3, +h4, +h5, +h6 { + font-size: 11pt; + font-weight: 700; + color: #199970; + margin: 12pt 0 6pt; + padding-bottom: 0; + border-bottom: none; + line-height: 1.25; + text-align: left; + text-decoration: none; + page-break-after: avoid; + break-after: avoid; + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-paragraph, +.p, +p { + margin: 6pt 0 2pt; + font-size: 10pt; + line-height: 1.5; + text-align: justify; + orphans: 3; + widows: 3; +} + +strong, +b { + font-weight: 700; +} + +em, +i { + font-style: italic; +} + +a, +a:visited { + color: #0263A3; + text-decoration: none; + border-bottom: 0.5pt solid #0263A3; + word-break: break-word; +} + +/* ------------------------------ + LISTS + ------------------------------ */ +.hb-bullet-list, +.ul, +ul.hb-bullet-list { + list-style: none !important; + margin: 4pt 0 10pt 16pt; + padding: 0; + font-size: 9.5pt; +} + +.hb-bullet-list li, +.ul li, +ul.hb-bullet-list li { + position: relative; + margin: 0 0 5pt; + padding-left: 16pt; + line-height: 1.45; + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-bullet-list li::before, +.ul li::before, +ul.hb-bullet-list li::before { + content: "\27A4"; + position: absolute; + left: 0; + top: 0; + color: #0263A3; + font-size: 8pt; + font-weight: 700; +} + +/* ordered lists */ +.hb-numbered-list, +.ol, +ol.hb-numbered-list { + list-style: decimal !important; + margin: 4pt 0 10pt 18pt; + padding: 0; + font-size: 9.5pt; +} + +.hb-numbered-list li, +.ol li, +ol.hb-numbered-list li { + margin: 0 0 5pt; + padding-left: 2pt; + line-height: 1.45; + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-numbered-list li::before, +.ol li::before, +ol.hb-numbered-list li::before { + content: none !important; +} + +/* Sub-bullets — checkmarks, indented under parent bullet */ +.hb-sub-bullets { + margin-left: 32pt !important; +} + +.hb-sub-bullets li::before { + content: "\2713" !important; + color: #199970; + font-size: 9pt; + font-weight: 700; +} + +/* ------------------------------ + NOTES / EMPHASIS + Guideline requires full bold text; + NOTE and ONLY IF in red. + ------------------------------ */ +.hb-note, +.note { + margin: 10pt 0 12pt; + padding: 8pt 10pt 8pt 14pt; + border-left: 3.5pt solid #C00000; + background: #FFF8F8; + border-radius: 0 2pt 2pt 0; + font-size: 10pt; + line-height: 1.45; + font-weight: 700; + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-note-keyword, +.note-keyword, +.note .keyword, +.only-if { + color: #C00000; + font-weight: 700; +} + +/* ------------------------------ + BOLD EMPHASIS FOR KEY INFORMATION + Payment amounts, REGULAR, PRIME, critical terms + ------------------------------ */ +.hb-bold-keyword { + font-weight: 700; +} + +.hb-red-keyword { + font-weight: 700; + color: #C00000; +} + +/* ------------------------------ + SCHOOL INFO - Green color for school information + ------------------------------ */ +.hb-school-info { + color: #199970; + font-size: 9.5px; +} + +/* ------------------------------ + HELPERS + ------------------------------ */ +.page-break { + page-break-before: always; + break-before: page; +} + +.avoid-break { + page-break-inside: avoid; + break-inside: avoid; +} + +.keep-with-next { + page-break-after: avoid; + break-after: avoid; +} + +.clearfix::after { + content: ""; + display: block; + clear: both; +} + +.debug-block { + border: 2px dashed #cc0000; + padding: 12px; + background: #fffbe6; + page-break-inside: avoid; + break-inside: avoid; +} + +/* ------------------------------ + COVER PAGE + Prefer named page. + If unsupported by renderer, cover will still render safely. + ------------------------------ */ +.cover-page { + page: cover; + page-break-after: always; + break-after: page; + margin: 0; + padding: 0; + position: relative; + overflow: hidden; +} + +.cover-img { + display: block; + width: 100%; + height: auto; +} + +/* ------------------------------ + FULL-PAGE IMAGE PAGES + Only use when intentionally full-page. + ------------------------------ */ +.fullpage-img-wrap { + page: cover; + page-break-before: always; + break-before: page; + page-break-after: always; + break-after: page; + margin: 0; + padding: 0; + position: relative; + overflow: hidden; +} + +.fullpage-img { + display: block; + width: 100%; + height: auto; +} + +img { + max-width: 100%; + height: auto; +} + +/* ------------------------------ + TABLES - GLOBAL + Clean styling matching Word handbook: white cells, + bold headers, thin black borders, no colored fills. + ------------------------------ */ +table { + width: 100%; + border-collapse: collapse; + table-layout: fixed; + max-width: 100%; + overflow: hidden; + page-break-inside: auto; + break-inside: auto; +} + +thead { + display: table-header-group; +} + +tfoot { + display: table-row-group; +} + +tr { + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-table, +.tbl { + width: 100%; + border-collapse: collapse; + table-layout: fixed; + max-width: 100%; + margin: 10pt 0 14pt; + font-size: 9.5pt; + line-height: 1.35; + border: 0.75pt solid #000000; +} + +.hb-table th, +.hb-table td, +.tbl th, +.tbl td { + border: 0.75pt solid #000000; + padding: 6pt 8pt; + vertical-align: top; + word-wrap: break-word; + overflow-wrap: break-word; + font-size: 9.5pt; + line-height: 1.35; + background: #ffffff; + color: #1a1a1a; + text-align: left; +} + +.hb-table th, +.tbl th { + font-weight: 700; + text-transform: uppercase; +} + +/* comparison table */ +.hb-table-comparison, +.tbl-comparison { + font-size: 9pt; +} + +.hb-table-comparison th, +.tbl-comparison th { + text-align: left; + font-size: 9pt; + font-weight: 700; + text-transform: uppercase; +} + +/* Section-divider row: teal background matching Word doc #31849B */ +.hb-table th.is-regular-col, +.hb-table td.is-regular-col, +.tbl th.is-regular-col, +.tbl td.is-regular-col { + color: #ffffff; + font-weight: 700; + background: #31849B; +} + +.hb-table td.is-regular-col, +.tbl td.is-regular-col { + color: #31849B; + background: transparent; +} + +.hb-table th.is-prime-col, +.hb-table td.is-prime-col, +.tbl th.is-prime-col, +.tbl td.is-prime-col { + color: #ffffff; + font-weight: 700; + background: #1F497D; +} + +.hb-table td.is-prime-col, +.tbl td.is-prime-col { + color: #1F497D; + background: transparent; +} + +/* ------------------------------ + PROGRAM TABLES + Matching Word handbook: bold green (#199970) header text, + white cells, thin black borders, no colored fills. + ------------------------------ */ +.hb-programs, +table.programs { + width: 100%; + border-collapse: collapse; + table-layout: fixed; + max-width: 100%; + margin: 10pt 0 14pt; + border: 0.75pt solid #000000; + font-family: "Century Gothic", "Segoe UI", sans-serif; +} + +.hb-programs th, +.hb-programs td, +table.programs th, +table.programs td { + border: 0.75pt solid #000000; + padding: 5pt 6pt; + vertical-align: top; + word-wrap: break-word; + overflow-wrap: break-word; + font-size: 8.75pt; + line-height: 1.3; + background: #ffffff; + color: #1a1a1a; +} + +.hb-programs th, +table.programs th { + font-weight: 700; + background: #ffffff; + color: #199970; + text-transform: uppercase; + text-align: left; + letter-spacing: 0.3px; + font-size: 8.5pt; + padding: 6pt 6pt; +} + +.hb-programs td, +table.programs td { + text-align: left; +} + +.hb-programs td a, +table.programs td a { + text-decoration: none; + border-bottom: none; + color: #1c75bc; + font-weight: 700; +} + +.hb-programs th:nth-child(1), +.hb-programs td:nth-child(1), +table.programs th:nth-child(1), +table.programs td:nth-child(1) { + width: 22%; +} + +.hb-programs th:nth-child(2), +.hb-programs td:nth-child(2), +table.programs th:nth-child(2), +table.programs td:nth-child(2) { + width: 14%; +} + +.hb-programs th:nth-child(3), +.hb-programs td:nth-child(3), +table.programs th:nth-child(3), +table.programs td:nth-child(3) { + width: 16%; +} + +.hb-programs th:nth-child(4), +.hb-programs td:nth-child(4), +table.programs th:nth-child(4), +table.programs td:nth-child(4) { + width: 30%; +} + +.hb-programs th:nth-child(5), +.hb-programs td:nth-child(5), +table.programs th:nth-child(5), +table.programs td:nth-child(5) { + width: 18%; +} + +/* career list inside programs table */ +.hb-career-list, +.career-list { + margin: 0; + padding-left: 12pt; + list-style-type: disc; +} + +.hb-career-list li, +.career-list li { + margin: 0 0 2pt; + padding-left: 0; + line-height: 1.2; + font-size: 8.75pt; +} + +.hb-career-list li::before, +.career-list li::before { + content: none; +} + +/* ------------------------------ + TABLE OF CONTENTS + Stable within content width + ------------------------------ */ +/* ── Table of Contents ─────────────────────────── */ +.toc-page { + display: block; + width: 100%; + margin: 0; + padding: 0; + page-break-after: always; + break-after: page; + page-break-inside: avoid; + break-inside: avoid; +} + +.toc { + width: 100%; + margin: 0; + padding: 0; +} + +.toc-heading { + display: block; + font-size: 14pt; + font-weight: 700; + text-transform: uppercase; + color: #0263A3; + margin: 0 0 6pt; + padding-bottom: 3pt; + border-bottom: 2.5pt solid #0263A3; + line-height: 1.2; +} + +/* Each TOC row: flex row, single line, with dot leader filling gap */ +.toc-entry { + display: flex; + flex-direction: row; + align-items: baseline; + margin: 0; + padding: 1.5pt 0; + line-height: 1.2; + font-size: 9pt; + color: #1a1a1a; + overflow: hidden; + white-space: nowrap; +} + +/* Major section rows: bold, uppercase */ +.toc-entry--major { + font-weight: 700; + text-transform: uppercase; + font-size: 9pt; +} + +/* Sub-items (universities): regular weight */ +.toc-entry--sub { + font-weight: 400; + text-transform: none; + font-size: 8.5pt; +} + +/* Indented entries (universities) */ +.toc-entry--indent { + padding-left: 16pt; +} + +/* Title text: no shrink, no wrap */ +.toc-label { + flex: 0 1 auto; + overflow: hidden; + text-overflow: clip; + white-space: nowrap; +} + +/* Dot leader: fills remaining space between title and page number */ +.toc-leader { + flex: 1 1 auto; + overflow: hidden; + white-space: nowrap; + margin: 0 3pt; + min-width: 16pt; +} + +.toc-leader::after { + content: " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."; + display: inline; + font-size: 7.5pt; + letter-spacing: 0pt; + color: #999; + word-spacing: 0; +} + +/* Page number: fixed width, right-aligned, no shrink */ +span.toc-page { + flex: 0 0 auto; + text-align: right; + min-width: 20pt; + font-weight: 400; + color: #1a1a1a; + white-space: nowrap; + font-style: normal; + font-size: inherit; +} + +/* Links inside TOC */ +.toc-entry a, +.toc-entry a:visited { + color: inherit; + text-decoration: none; + border-bottom: none; +} + +/* ------------------------------ + SECTION BLOCKS + ------------------------------ */ +.section-block { + display: block; + width: 100%; + margin: 0 0 12pt; + padding: 0; + border: none; +} + +.section-block> :first-child { + margin-top: 0; +} + +.summary-section { + display: block; +} + +/* Tier group divider heading (Tier One, Tier Two, etc.) */ +.tier-group-heading { + page-break-before: always; + break-before: page; +} + +.tier-group-heading .hb-heading-1, +.tier-group-heading .h1 { + font-size: 12pt; + font-weight: normal; + color: #199970; + text-align: center; + border-bottom: none; + border-top: none; + padding: 10pt 0; + margin: 20pt 0 12pt; +} + +/* ------------------------------ + ENROLMENT STEPS + Dedicated page block + ------------------------------ */ +.hb-enrollment-steps, +.sec-steps { + page-break-before: always; + break-before: page; + page-break-after: always; + break-after: page; +} + +.hb-step { + margin: 0 0 12pt; + padding: 8pt 0 8pt 12pt; + border-left: 3pt solid #199970; + background: #F6FBF9; + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-step-title { + font-size: 10.5pt; + font-weight: 700; + color: #199970; + margin: 0 0 4pt; + line-height: 1.25; + page-break-after: avoid; + break-after: avoid; +} + +.hb-step-qr-wrap { + margin: 4pt 0 6pt; + text-align: center; +} + +.hb-step-qr { + display: inline-block; + width: 72pt; + height: 72pt; + margin: 4pt 0 6pt; +} + +.hb-telegram-link { + margin: 4pt 0 2pt; + text-align: center; +} + +.hb-telegram-link a { + color: #0263A3; + text-decoration: underline; + font-size: 9pt; +} + +.hb-plain-url { + font-weight: 400; + color: #0263A3; + text-decoration: underline; +} + +/* legacy step support */ +.sec-steps .h3 { + margin: 0 0 4pt; + font-size: 10pt; + color: #199970; +} + +.sec-steps .p { + margin: 0 0 6pt; +} + +.sec-steps .ul { + margin: 0 0 6pt 16pt; +} + +/* ------------------------------ + UNIVERSITY / SCHOOL PAGE + ------------------------------ */ +.uni, +.hb-school-profile { + page-break-before: always; + break-before: page; + padding: 0; +} + +.hb-uni-name, +.uni-name { + font-size: 12pt; + font-weight: 700; + margin: 0 0 3pt; + padding-bottom: 0; + border-bottom: none; + color: #0263A3; + line-height: 1.2; + text-transform: none; + text-decoration: underline; + text-underline-offset: 2pt; +} + +.hb-uni-name a, +.hb-uni-name-link, +.uni-name a.uni-name-link { + color: #0263A3; + text-decoration: underline; + text-underline-offset: 2pt; + border-bottom: none; + font-weight: 700; +} + +/* School info directly below school name in green */ +.hb-summary-title, +.summary-title { + font-size: 10pt; + font-weight: 700; + text-transform: uppercase; + margin: 6pt 0 4pt; + color: #199970; + letter-spacing: 0.2px; +} + +.hb-summary-ul, +.summary-ul { + list-style: none; + margin: 0 0 8pt; + padding: 0; +} + +.hb-summary-ul li, +.summary-ul li { + margin: 0 0 3pt; + padding: 0; + font-size: 10pt; + line-height: 1.3; + color: #199970; + font-weight: 700; +} + +.hb-summary-ul li::before, +.summary-ul li::before { + content: none; +} + +.hb-lbl, +.lbl { + font-weight: 700; + color: #199970; +} + +.hb-uni-website, +.uni-website { + margin: 4pt 0 8pt; + font-size: 10pt; + color: #199970; +} + +/* top school layout */ +.hb-school-top-table, +.school-top-table { + width: 100%; + border-collapse: collapse; + table-layout: fixed; + margin: 0 0 6pt; + border: none; +} + +.hb-school-top-table td, +.school-top-table td { + border: none; + padding: 0; + vertical-align: top; +} + +.hb-school-top-summary, +.school-top-summary { + width: 52%; + padding-right: 10pt !important; +} + +.hb-school-top-campus, +.school-top-campus { + width: 48%; + text-align: right; +} + +.hb-campus-img, +.campus-top-img { + display: block; + width: 100%; + max-height: 180pt; + height: 180pt; + border: 0.75pt solid #CBD5E1; + object-fit: cover; + box-shadow: 0 1pt 4pt rgba(0, 0, 0, 0.10); +} + +.hb-campus-caption, +.campus-top-cap { + display: none; +} + +.hb-campus-placeholder, +.campus-placeholder-cell { + width: 100%; + min-height: 120pt; + border: 1pt dashed #CBD5E1; + color: #94A3B8; + font-size: 10pt; + padding: 10pt; + background: #F8FAFC; +} + +/* ------------------------------ + BENEFITS SECTION + Guideline colors retained + ------------------------------ */ +.hb-benefits-section, +.benefits-section { + clear: both; + margin: 8pt 0 8pt; + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-benefits-heading { + margin: 0 0 4pt; + line-height: 1.3; +} + +.hb-benefits-bar, +.benefits-bar { + display: inline-block; + background: linear-gradient(135deg, #199970 0%, #00C853 100%); + color: #ffffff; + font-weight: 700; + padding: 4pt 12pt; + font-size: 10pt; + line-height: 1.35; + letter-spacing: 0.3px; +} + +.hb-benefits-ul, +.benefits-ul { + list-style: none; + margin: 0; + padding: 0; +} + +.hb-benefits-ul li, +.hb-benefit-item, +.benefit-li { + margin: 3pt 0; + padding: 0; + font-size: 9.75pt; + line-height: 1.35; + background: none; + border: none; +} + +.hb-benefits-ul li::before, +.hb-benefit-item::before, +.benefit-li::before { + content: "\27A4"; + display: inline-block; + margin-right: 6pt; + color: #199970; + font-weight: 700; +} + +.hb-benefit-text, +.benefit-text { + display: inline-block; + background: #E0F7FA; + padding: 2pt 6pt; + border-left: 2pt solid #00BCD4; +} + +.benefit-bullet { + display: none; +} + +/* ------------------------------ + FUNDING SECTION + Matches benefits styling with distinct bar color + ------------------------------ */ +.hb-funding-section, +.funding-section { + clear: both; + margin: 6pt 0 8pt; + page-break-inside: avoid; + break-inside: avoid; +} + +.hb-funding-heading { + margin: 0 0 4pt; + line-height: 1.3; +} + +.hb-funding-bar, +.funding-bar { + display: inline-block; + background: linear-gradient(135deg, #0263A3 0%, #0288D1 100%); + color: #ffffff; + font-weight: 700; + padding: 4pt 12pt; + font-size: 10pt; + line-height: 1.35; + letter-spacing: 0.3px; +} + +.funding-ul { + list-style: none; + margin: 0; + padding: 0; +} + +.funding-li { + margin: 3pt 0; + padding: 0; + font-size: 9.75pt; + line-height: 1.35; +} + +.funding-li .benefit-bullet { + display: none; +} + +.funding-li::before { + content: "\27A4"; + display: inline-block; + margin-right: 6pt; + color: #0263A3; + font-weight: 700; +} + +.hb-funding-section .hb-benefit-item::before { + color: #0263A3; +} + +.hb-funding-section .hb-benefit-text { + background: #E3F2FD; + border-left: 2pt solid #0288D1; +} + +.funding-li .benefit-text { + display: inline-block; + background: #E3F2FD; + padding: 2pt 6pt; + border-left: 2pt solid #0288D1; +} + +/* ------------------------------ + UNIVERSITY SUMMARY LIST + Black, bold, numbered + ------------------------------ */ +.hb-university-list { + margin: 4pt 0 12pt 18pt; + padding: 0; + list-style: decimal; + font-size: 10pt; + color: #1a1a1a; +} + +.hb-university-list li { + margin: 0 0 5pt; + padding-left: 0; + font-weight: 700; + line-height: 1.45; + color: #0263A3; +} + +.hb-university-list li::before { + content: none; +} + +/* ------------------------------ + QUALIFICATION / MUTED TEXT + ------------------------------ */ +.hb-qualify, +.qualify { + margin: 10pt 0 8pt; + font-weight: 700; + font-size: 10pt; + font-style: normal; + color: #1a1a1a; + width: 100%; + padding: 8pt 10pt; + background: #F4F7FA; + border-left: 3pt solid #0263A3; +} + +.hb-muted, +.muted { + color: #666666; + font-size: 10pt; +} + +/* ------------------------------ + SECTION-SPECIFIC OVERRIDES + ------------------------------ */ +.sec-overview .hb-heading-1, +.sec-overview .h2 { + margin-top: 0; +} + +.sec-overview .hb-paragraph, +.sec-overview .p { + text-align: justify; + line-height: 1.4; +} + +.sec-qualification .hb-heading-1, +.sec-qualification .h2 { + color: #C00000; +} + +.sec-how .hb-heading-2 { + color: #C00000; +} + +.sec-qualification .hb-heading-2, +.sec-qualification .h3 { + margin-top: 10pt; +} + +.sec-qualification .hb-table, +.sec-qualification .tbl { + margin: 6pt 0 10pt; +} + +.sec-policy .hb-heading-2, +.sec-policy .h3 { + margin-top: 10pt; + padding-bottom: 0; +} + +.sec-policy .hb-note, +.sec-policy .note { + margin-top: 8pt; +} + +.sec-policy .hb-table, +.sec-policy .tbl { + margin: 6pt 0 10pt; +} + +.sec-refund .hb-table, +.sec-refund .tbl { + margin: 6pt 0 10pt; +} + +.sec-contributions .hb-paragraph, +.sec-contributions .p, +.sec-contributions .hb-bullet-list li, +.sec-contributions .ul li { + font-weight: 700; + color: #1a1a1a; +} + +.sec-contributions .hb-heading-1, +.sec-contributions .hb-heading-2, +.sec-contributions .h2, +.sec-contributions .h3 { + color: #199970; +} + +.sec-contributions .hb-table, +.sec-contributions .tbl { + margin: 6pt 0 10pt; +} + +.sec-contributions .hb-programs th, +.sec-contributions .hb-programs td, +.sec-contributions table.programs th, +.sec-contributions table.programs td, +.sec-funding .hb-programs th, +.sec-funding .hb-programs td, +.sec-funding table.programs th, +.sec-funding table.programs td { + text-align: left; +} + +.sec-contributions .hb-programs th, +.sec-contributions table.programs th, +.sec-funding .hb-programs th, +.sec-funding table.programs th { + color: #ffffff; + font-weight: 700; + font-size: 10pt; + line-height: 1.2; + background: #199970; +} + +.sec-funding .hb-table, +.sec-funding .tbl { + margin: 6pt 0 10pt; +} + +.sec-funding { + page-break-before: always; + break-before: page; +} + +.sec-funding .hb-table th, +.sec-funding .tbl th { + color: #ffffff; + font-weight: 700; + background: #199970; +} + +/* Images must stay inside margins */ +.page-content img { + max-width: 100%; + height: auto; +} + +/* ------------------------------ + BREAKDOWN — RELOCATION COST + ------------------------------ */ + +/* Banner heading (teal background bar) */ +.hb-banner-heading { + background: linear-gradient(135deg, #199970 0%, #0D7B55 100%); + color: #FFFFFF !important; + padding: 8pt 14pt; + margin: 12pt 0 8pt; + font-size: 11.5pt; + font-weight: 700; + letter-spacing: 0.4px; +} + +/* Page break before Relocation Cost */ +.sec-breakdown .page-break { + page-break-before: always; + break-before: page; +} + +/* Relocation cost table — merged note cell */ +.hb-relocation-table { + margin: 0 0 12pt; +} + +.hb-relocation-table td { + vertical-align: top; + padding: 5pt 8pt; + border: 0.75pt solid #CBD5E1; + font-size: 9.5pt; +} + +.hb-merged-note { + font-style: italic; + font-weight: 400; + font-size: 9.5pt; + line-height: 1.4; + display: block; + padding: 4pt 2pt; +} + +/* ISP FINANCING line */ +.hb-isp-financing { + margin: 16pt 0 8pt; + padding: 8pt 0; + border-top: 2pt solid #0263A3; + border-bottom: 2pt solid #0263A3; + text-align: center; + font-size: 10.5pt; + letter-spacing: 0.3px; +} + +.hb-isp-financing strong { + font-weight: 700; + color: #0263A3; +} + +.hb-isp-financing em { + font-style: italic; + color: #199970; + font-weight: 700; +} + +/* NB: CREDIT FACILITY — green */ +.hb-credit-note { + text-align: center; + color: #199970; + font-size: 10.5pt; + font-weight: 700; + margin: 8pt 0; + letter-spacing: 0.2px; +} \ No newline at end of file diff --git a/app/templates/handbook.html b/app/templates/handbook.html new file mode 100644 index 0000000000000000000000000000000000000000..f2161a2049007d19c37d6546659c53d2581ffcb0 --- /dev/null +++ b/app/templates/handbook.html @@ -0,0 +1,126 @@ + + + + + + ISP Handbook + + + {% if extra_css %} + + {% endif %} + + + + + {# ── Page Header (hidden; JS extracts img src for Playwright header_template) ── #} + {% if header_image %} + + {% endif %} + + {# ── Right-Side Label (position:fixed, repeats on every page) ── #} + {% if label_image %} +
+ +
+ {% endif %} + + {# ── Footer (hidden; Playwright footer_template renders page numbers) ── #} + + + {# ── Cover Page ── #} + {% if cover_image %} + {% include "partials/cover.html" %} + {% endif %} + + {# ── TOC Image Page ── #} + {% if toc_image %} +
+ Table of Contents +
+ {% endif %} + + {# ── Page Content Wrapper (padding clears the fixed header/footer) ── #} +
+ + {# ── Dynamic TOC ── #} + {% if not toc_image and toc_items %} +
+ {% include "partials/toc.html" %} +
+ {% endif %} + + {# ── Global Sections ── #} + {% for gs in general_sections %} +
+ {% if gs.blocks %} + {% for block in gs.blocks %} + {% include "partials/blocks/render_block.html" %} + {% endfor %} + {% else %} + {{ gs.rendered_html }} + {% endif %} +
+ {% endfor %} + + {# ── Summary of Universities ── #} + {% if summary_block %} +
+ {% if summary_block.blocks %} + {% for block in summary_block.blocks %} + {% include "partials/blocks/render_block.html" %} + {% endfor %} + {% else %} + {{ summary_block.rendered_html }} + {% endif %} +
+ {% endif %} + + {# ── University Sections (grouped by tier with divider headings) ── #} + {% if university_blocks %} + {% for block in university_blocks %} + {% include "partials/blocks/render_block.html" %} + {% endfor %} + {% elif universities %} + {% for uni in universities %} + {% if uni.tier_group_start and uni.tier_group_label %} +
+

{{ uni.tier_group_label | e }} +

+
+ {% endif %} + {% include "partials/university.html" %} + {% endfor %} + {% endif %} + +
{# /page-content #} + + {# ── Bottom Image Pages ── #} + {% for img_path in bottom_pages %} +
+ Handbook Page Image +
+ {% endfor %} + + {# ── Debug Summary ── #} + {% if debug and stats %} +
+
+

PDF Debug Summary

+
{{ stats | tojson(indent=2) }}
+
+ {% endif %} + + + + \ No newline at end of file diff --git a/app/templates/partials/blocks/bullet_list.html b/app/templates/partials/blocks/bullet_list.html new file mode 100644 index 0000000000000000000000000000000000000000..8794dab32bab8938fc52e0c7c4bece3279de3b1f --- /dev/null +++ b/app/templates/partials/blocks/bullet_list.html @@ -0,0 +1,22 @@ +{# Block partial: bullet_list (ordered or unordered) — supports HTML-formatted entries #} +{% if block.data.ordered %} +
    + {% for item in block.data.entries %} + {% if block.data.html_entries %} +
  1. {{ item }}
  2. + {% else %} +
  3. {{ item | e }}
  4. + {% endif %} + {% endfor %} +
+{% else %} + +{% endif %} \ No newline at end of file diff --git a/app/templates/partials/blocks/enrollment_steps.html b/app/templates/partials/blocks/enrollment_steps.html new file mode 100644 index 0000000000000000000000000000000000000000..bab5da0f40a24778adb8048096f6caae0a658695 --- /dev/null +++ b/app/templates/partials/blocks/enrollment_steps.html @@ -0,0 +1,39 @@ +{# Block partial: enrollment_steps — each step visually separated #} +{% for step in block.data.steps %} +
+ {% if step.title %} +
Step {{ step.number }}: {{ step.title | e }}
+ {% endif %} + {% if step.body_html %} +

{{ step.body_html }}

+ {% elif step.body %} +

{{ step.body | e }}

+ {% endif %} + {% if step.links %} + + {% endif %} + {% if step.plain_links %} + + {% endif %} + {% if step.qr_url %} +
+ QR Code +
+ {% endif %} + {% if step.telegram_url %} + +

This telegram group will help you interact with program administrators and other prospective + students where you can ask any questions you may have about the program.

+ {% endif %} +
+{% endfor %} \ No newline at end of file diff --git a/app/templates/partials/blocks/heading.html b/app/templates/partials/blocks/heading.html new file mode 100644 index 0000000000000000000000000000000000000000..39c4a4ae33cb7aad26e9dcf6d581d7945d994e98 --- /dev/null +++ b/app/templates/partials/blocks/heading.html @@ -0,0 +1,6 @@ +{# Block partial: heading_1 / heading_2 #} +{% if block.block_type == 'heading_1' %} +

{{ block.data.text | e }}

+{% elif block.block_type == 'heading_2' %} +

{{ block.data.text | e }}

+{% endif %} \ No newline at end of file diff --git a/app/templates/partials/blocks/note.html b/app/templates/partials/blocks/note.html new file mode 100644 index 0000000000000000000000000000000000000000..0dd8023543ddd6619930b7ddf249a96b8ece25d4 --- /dev/null +++ b/app/templates/partials/blocks/note.html @@ -0,0 +1,34 @@ +{# Block partial: note (standalone or inline-parts) #} +{% if block.data.inline and block.data.parts %} +
+ {% for part in block.data.parts %} + {% if part.style == 'red_bold' %} + {{ part.text | e }} + {% elif part.style == 'bold' %} + {{ part.text | e }} + {% elif part.style == 'italic' %} + {{ part.text | e }} + {% else %} + {{ part.text | e }} + {% endif %} + {% endfor %} +
+{% else %} +
+ {% set text = block.data.text | default('') %} + {# Highlight NOTE / ONLY IF keywords in bold + red; rest stays bold via CSS #} + {% if text.upper().startswith('NOTE:') %} + NOTE: {{ text[5:] | e }} + {% elif text.upper().startswith('NOTE ') %} + NOTE {{ text[4:] | e }} + {% elif text.upper().startswith('NOTE') %} + NOTE{{ text[4:] | e }} + {% elif text.upper().startswith('ONLY IF:') %} + ONLY IF: {{ text[8:] | e }} + {% elif text.upper().startswith('ONLY IF') %} + ONLY IF {{ text[7:] | e }} + {% else %} + {{ text | e }} + {% endif %} +
+{% endif %} \ No newline at end of file diff --git a/app/templates/partials/blocks/paragraph.html b/app/templates/partials/blocks/paragraph.html new file mode 100644 index 0000000000000000000000000000000000000000..25ab402d53207b365affe46b4376f98b7a3484f9 --- /dev/null +++ b/app/templates/partials/blocks/paragraph.html @@ -0,0 +1,6 @@ +{# Block partial: paragraph — supports pre-formatted HTML for bold emphasis #} +{% if block.data.html %} +

{{ block.data.html }}

+{% else %} +

{{ block.data.text | e }}

+{% endif %} \ No newline at end of file diff --git a/app/templates/partials/blocks/render_block.html b/app/templates/partials/blocks/render_block.html new file mode 100644 index 0000000000000000000000000000000000000000..4488f4425577cc70f8808954ad54013803a96876 --- /dev/null +++ b/app/templates/partials/blocks/render_block.html @@ -0,0 +1,18 @@ +{# Universal block dispatcher — renders any RenderBlock via its type-specific partial #} +{% if block.block_type in ('heading_1', 'heading_2') %} +{% include "partials/blocks/heading.html" %} +{% elif block.block_type == 'paragraph' %} +{% include "partials/blocks/paragraph.html" %} +{% elif block.block_type == 'bullet_list' %} +{% include "partials/blocks/bullet_list.html" %} +{% elif block.block_type == 'note' %} +{% include "partials/blocks/note.html" %} +{% elif block.block_type == 'table' %} +{% include "partials/blocks/table.html" %} +{% elif block.block_type == 'enrollment_steps' %} +{% include "partials/blocks/enrollment_steps.html" %} +{% elif block.block_type == 'university_summary' %} +{% include "partials/blocks/university_summary.html" %} +{% elif block.block_type == 'school_profile' %} +{% include "partials/blocks/school_profile.html" %} +{% endif %} \ No newline at end of file diff --git a/app/templates/partials/blocks/school_profile.html b/app/templates/partials/blocks/school_profile.html new file mode 100644 index 0000000000000000000000000000000000000000..d4a0091cf337dd201207f2a565a6d29daae61133 --- /dev/null +++ b/app/templates/partials/blocks/school_profile.html @@ -0,0 +1,122 @@ +{# Block partial: school_profile — full university page #} +
+ + {# ── University Title ── #} + {% if block.data.website %} + + {% else %} +
{{ block.data.name | e }}
+ {% endif %} + + {# ── Two-column: Summary + Campus Image ── #} + + + + + +
+ {% if block.data.overview %} +
    + {% if block.data.overview.founded %}
  • Founded: {{ + block.data.overview.founded | e }}
  • {% endif %} + {% if block.data.overview.total_students %}
  • Total Students: {{ + block.data.overview.total_students | e }}
  • {% endif %} + {% if block.data.overview.undergraduates %}
  • Undergraduate Students: + {{ block.data.overview.undergraduates | e }}
  • {% endif %} + {% if block.data.overview.postgraduates %}
  • Postgraduate Students: {{ + block.data.overview.postgraduates | e }}
  • {% endif %} + {% if block.data.overview.acceptance_rate %} +
  • Acceptance Rate: {{ + block.data.overview.acceptance_rate | e }}
  • + {% endif %} + {% if block.data.overview.location %} +
  • Location: {{ + block.data.overview.location | e }}
  • + {% endif %} + {% if block.data.overview.tuition %}
  • Yearly Tuition/Out-of-State + Tuition: {{ block.data.overview.tuition | e }}
  • {% endif %} +
+ {% endif %} +
+ {% if block.data.campus_image %} + Campus Image + {% else %} +
Campus image unavailable
+ {% endif %} +
+ + {# ── Benefits ── #} + {% if block.data.benefits is not none %} +
+
Benefits for ISP Students +
+ {% if block.data.benefits %} +
    + {% for b in block.data.benefits %} + {% if b %} +
  • {{ b | e }}
  • + {% endif %} + {% endfor %} +
+ {% else %} +
No benefits listed.
+ {% endif %} +
+ {% endif %} + + {# ── Funding ── #} + {% if block.data.funding_items %} +
+
{{ block.data.funding_heading | default('Funding + Available') | e }}
+
    + {% for item in block.data.funding_items %} +
  • {{ item | e }}
  • + {% endfor %} +
+
+ {% endif %} + + {# ── Programs Table ── #} + {% if block.data.programs is not none %} +
To qualify for The International Scholars Program at {{ block.data.name | e }}, one must be + willing to study this course:
+ {% if block.data.programs %} + + + + + + + + + + {% for p in block.data.programs %} + + + + + + + {% endfor %} + +
ProgramDesignationEntrance Examination
{% if p.link %}{{ p.name | e + }}{% else %}{{ p.name | e }}{% endif %}{{ p.designation | e }}{{ p.entrance | e }}
+ {% else %} +
No programs listed.
+ {% endif %} + {% endif %} + + {# ── Extra Sections ── #} + {% for extra_list in block.data.extra_blocks %} + {% for block in extra_list %} + {% include "partials/blocks/render_block.html" %} + {% endfor %} + {% endfor %} + +
\ No newline at end of file diff --git a/app/templates/partials/blocks/table.html b/app/templates/partials/blocks/table.html new file mode 100644 index 0000000000000000000000000000000000000000..b331e76d9aa4859763f1bf227d9f4363abfa4d2e --- /dev/null +++ b/app/templates/partials/blocks/table.html @@ -0,0 +1,91 @@ +{# Block partial: table (standard, comparison, spanning variants) #} +{% set variant = block.data.variant | default('standard') %} + +{% if variant == 'comparison' %} +{# ── Comparison table (table_v2) ── #} + + + {% if block.data.header_groups %} + + {% for c in block.data.base_columns %} + + {% endfor %} + {% for g in block.data.header_groups %} + + {% endfor %} + + + {% for g in block.data.header_groups %} + {% for c in g.columns %} + + {% endfor %} + {% endfor %} + + {% else %} + + {% for c in block.data.all_columns %} + {% set col_label = c.label | default('') %} + {% set col_lc = col_label | lower %} + + {% endfor %} + + {% endif %} + + + {% for row in block.data.rows %} + + {% for c in block.data.all_columns %} + {% set col_label = c.label | default('') %} + {% set col_lc = col_label | lower %} + + {% endfor %} + + {% endfor %} + +
{{ c.label | e }}{{ g.label | e }}
{{ c.label | e }}
{{ + c.label | e }}
{{ + row[c.key] | default('') | safe }}
+ +{% elif variant == 'spanning' %} +{# ── Spanning table (table_v3 / table_v4) ── #} + + + {% for row in block.data.rows %} + + {% for cell in row %} + 1 %} colspan="{{ cell.colspan }}"{% endif %}{% if cell.rowspan > 1 %} rowspan="{{ + cell.rowspan }}"{% endif %}>{{ cell.text | safe }} + {% endfor %} + + {% endfor %} + +
+ +{% else %} +{# ── Standard table ── #} + + {% if block.data.columns %} + + + {% for col in block.data.columns %} + {% set col_lc = (col | lower) %} + + {% endfor %} + + + {% endif %} + + {% for row in block.data.rows %} + + {% for cell in row %} + {% set col = block.data.columns[loop.index0] if block.data.columns and loop.index0 < (block.data.columns | + length) else '' %} {% set col_lc=(col | lower) %} + {% endfor %} + + {% endfor %} + +
{{ + col | e }}
{{ + cell | safe }}
+{% endif %} \ No newline at end of file diff --git a/app/templates/partials/blocks/university_summary.html b/app/templates/partials/blocks/university_summary.html new file mode 100644 index 0000000000000000000000000000000000000000..db229ddfe7c2ffe2142298b20f51cd364416cf00 --- /dev/null +++ b/app/templates/partials/blocks/university_summary.html @@ -0,0 +1,6 @@ +{# Block partial: university_summary — numbered list of universities #} +
    + {% for name in block.data.universities %} +
  1. {{ name | e }}
  2. + {% endfor %} +
\ No newline at end of file diff --git a/app/templates/partials/cover.html b/app/templates/partials/cover.html new file mode 100644 index 0000000000000000000000000000000000000000..5e93f23116404f97fa741965b7ae130670d8cee4 --- /dev/null +++ b/app/templates/partials/cover.html @@ -0,0 +1,4 @@ +{# Cover page partial #} +
+ Cover +
\ No newline at end of file diff --git a/app/templates/partials/section.html b/app/templates/partials/section.html new file mode 100644 index 0000000000000000000000000000000000000000..16a1f85db2bd3601d8b6a9c54ccf87666f55a82a --- /dev/null +++ b/app/templates/partials/section.html @@ -0,0 +1,6 @@ +{# Generic section block partial — renders pre-built HTML from renderers.py #} +
+ {{ rendered_html }} +
\ No newline at end of file diff --git a/app/templates/partials/toc.html b/app/templates/partials/toc.html new file mode 100644 index 0000000000000000000000000000000000000000..a873f117f2e8cae098a4119d058e5b94b2e32f58 --- /dev/null +++ b/app/templates/partials/toc.html @@ -0,0 +1,17 @@ +{# Table of Contents — CSS dot-leader layout (single-line per entry) #} +
+
CONTENTS
+
+ {% for e in toc_items_sorted %} + {% if e.title %} +
+ {% if e.target %}{% endif %}{{ e.display_title | e }}{% + if e.target %}{% endif %} + + {% if e.page %}{{ e.page | e }}{% endif %} +
+ {% endif %} + {% endfor %} +
+
\ No newline at end of file diff --git a/app/templates/partials/university.html b/app/templates/partials/university.html new file mode 100644 index 0000000000000000000000000000000000000000..398632771fda9619a78d6c4f705ac8f533d2ed3d --- /dev/null +++ b/app/templates/partials/university.html @@ -0,0 +1,126 @@ +{# University section partial — iterates within handbook.html #} +
+ + {# ── University Title ── #} + {% if uni.website %} + + {% else %} +
{{ uni.name | e }}
+ {% endif %} + + {# ── Two-column: Summary + Campus Image ── #} + + + + + +
+ {% if uni.overview %} +
    + {% if uni.overview.founded %}
  • Founded: {{ uni.overview.founded | e }} +
  • {% endif %} + {% if uni.overview.total_students %}
  • Total Students: {{ + uni.overview.total_students | e }}
  • {% endif %} + {% if uni.overview.undergraduates %}
  • Undergraduate Students: {{ + uni.overview.undergraduates | e }}
  • {% endif %} + {% if uni.overview.postgraduates %}
  • Postgraduate Students: {{ + uni.overview.postgraduates | e }}
  • {% endif %} + {% if uni.overview.acceptance_rate %} +
  • Acceptance Rate: {{ uni.overview.acceptance_rate | e }}
  • + {% endif %} + {% if uni.overview.location %} +
  • Location: {{ uni.overview.location | e }}
  • + {% endif %} + {% if uni.overview.tuition %}
  • Yearly Tuition/Out-of-State Tuition: {{ + uni.overview.tuition | e }}
  • {% endif %} +
+ {% if uni.website %} +
+ Website: + {{ uni.website | e }} +
+ {% endif %} + {% endif %} +
+ {% if uni.campus_image %} + Campus Image + {% else %} +
Campus image unavailable
+ {% endif %} +
+ + {# ── Benefits ── #} + {% if uni.benefits is defined and uni.benefits is not none %} +
+
Benefits for ISP Students
+ {% if uni.benefits %} +
    + {% for b in uni.benefits %} + {% if b %} +
  • {{ b | e + }}
  • + {% endif %} + {% endfor %} +
+ {% else %} +
No benefits listed.
+ {% endif %} +
+ {% endif %} + + {# ── Funding ── #} + {% if uni.funding_items %} +
+
{{ uni.funding_heading | default('Funding Available') | e }}
+
    + {% for item in uni.funding_items %} +
  • {{ item | e + }}
  • + {% endfor %} +
+
+ {% endif %} + + {# ── Programs Table ── #} + {% if uni.programs is defined %} +
To qualify for The International Scholars Program at {{ uni.name | e }}, you must be willing to + study any of the following programs:
+ {% if uni.programs %} + + + + + + + + + + {% for p in uni.programs %} + + + + + + {% endfor %} + +
ProgramDesignationEntrance Examination
+ {% if p.link %}{{ p.name | e + }} + {% else %}{{ p.name | e }}{% endif %} + {{ p.designation | e }}{{ p.entrance | e }}
+ {% else %} +
No programs listed.
+ {% endif %} + {% endif %} + + {# ── Extra Sections ── #} + {% for extra in uni.extra_sections %} + {{ extra.rendered_html }} + {% endfor %} + +
\ No newline at end of file diff --git a/fonts/GOTHIC.TTF b/fonts/GOTHIC.TTF new file mode 100644 index 0000000000000000000000000000000000000000..4a5ec9f29ea79e294cd0a7d7f351abd1c0d18aca --- /dev/null +++ b/fonts/GOTHIC.TTF @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a9cbb5d75b2a2b0d22dc94571608e4e9dc7b88e825374985880c5722c1c9e5f +size 137568 diff --git a/fonts/GOTHICB.TTF b/fonts/GOTHICB.TTF new file mode 100644 index 0000000000000000000000000000000000000000..169c1ef80e01cde8dc362348de715437f7e30033 --- /dev/null +++ b/fonts/GOTHICB.TTF @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90cb613b492874a560c0ff18a3402b1d24fb7e846dff11295d5c4644d6c75e83 +size 129676 diff --git a/fonts/GOTHICBI.TTF b/fonts/GOTHICBI.TTF new file mode 100644 index 0000000000000000000000000000000000000000..4ab9a0fd861bbc63ff8151301a37a7962146b4b9 --- /dev/null +++ b/fonts/GOTHICBI.TTF @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc5949d57d2e172601fb6f5093c1fbf15a463e29ed47c4c8ff2434baf1c2b19 +size 139084 diff --git a/fonts/GOTHICI.TTF b/fonts/GOTHICI.TTF new file mode 100644 index 0000000000000000000000000000000000000000..e621e4812c3ac1aea7dfc9b2a3c23dbbfe57537d --- /dev/null +++ b/fonts/GOTHICI.TTF @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf57be6f9d0bd60bd5dc6eee7c11e87e5b19df210156495a524b974185b9fb9 +size 148520 diff --git a/images/ISP Handbook_Global-60-66_page-0001.jpg b/images/ISP Handbook_Global-60-66_page-0001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a99eb29099d7111fcd6395a845bd4f720d11b0c5 --- /dev/null +++ b/images/ISP Handbook_Global-60-66_page-0001.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1552f511ba168a226999a3b74fce3aac3acd9283262fe6859ff18dd631bf7f01 +size 705364 diff --git a/images/ISP Handbook_Global-60-66_page-0002.jpg b/images/ISP Handbook_Global-60-66_page-0002.jpg new file mode 100644 index 0000000000000000000000000000000000000000..adafbe02ca4bbed8160c9ffce0ce554585321e53 --- /dev/null +++ b/images/ISP Handbook_Global-60-66_page-0002.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f545312eccde68413a2c810a2d679ba039490ab59eaaf9d9ce11b608536570f +size 683160 diff --git a/images/IUP.webp b/images/IUP.webp new file mode 100644 index 0000000000000000000000000000000000000000..fdd4a58c3a97f4d5f57c51777632e17b7d785004 --- /dev/null +++ b/images/IUP.webp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93a30bb6da3631b8da5e108fdbe525e7b56a5ee119570ede48f4b89c068829f +size 304534 diff --git a/images/LOPY-61-65_page-0003.jpg b/images/LOPY-61-65_page-0003.jpg new file mode 100644 index 0000000000000000000000000000000000000000..99423edb0feba5ad2820bc73e5c17285fd7a5c52 --- /dev/null +++ b/images/LOPY-61-65_page-0003.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce0d68a58c1a667ab57a46b3f6068d385162e5faf2dd50ab12ef1b8a17da892 +size 922387 diff --git a/images/LOPY-61-65_page-0003.png b/images/LOPY-61-65_page-0003.png new file mode 100644 index 0000000000000000000000000000000000000000..e9b18d5fc6901a5db6593610d546d3b9974ae8f6 --- /dev/null +++ b/images/LOPY-61-65_page-0003.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f5f2973ed6417612fab9bf69914f807b184cd5e08ba580305f8a8e206878f6 +size 825619 diff --git a/images/LOPY-61-65_page-0004.jpg b/images/LOPY-61-65_page-0004.jpg new file mode 100644 index 0000000000000000000000000000000000000000..32755b14924bf585705ad5c9b63a6ad07867d37f --- /dev/null +++ b/images/LOPY-61-65_page-0004.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5ca2f61a2e24d9787820a184ba758dee6fa293aaf3a53eedad59ad447dc254 +size 984971 diff --git a/images/LOPY-61-65_page-0004.png b/images/LOPY-61-65_page-0004.png new file mode 100644 index 0000000000000000000000000000000000000000..0b4201594cf74caa65e428151e4ee1bb730d9485 --- /dev/null +++ b/images/LOPY-61-65_page-0004.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa3ff9feb86a5f1a34515320923f020fe69848207833ed559156617f6ce32cae +size 886729 diff --git a/images/Ohio.png b/images/Ohio.png new file mode 100644 index 0000000000000000000000000000000000000000..fb0984fe2637b8f9b2f55ed55f5bd6454e88056a --- /dev/null +++ b/images/Ohio.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a86b4a6f8741f881f822f6737eccdeef924b788d94896f65d39c45ae1a2ff9 +size 224526 diff --git a/images/OklahomaCityUniversity.png b/images/OklahomaCityUniversity.png new file mode 100644 index 0000000000000000000000000000000000000000..8c38579edb786e2ecec12d3072fecef29e1723c0 --- /dev/null +++ b/images/OklahomaCityUniversity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e40c18823cde9591a56f6e566b92d88c8baf3f791a4d68ac3e1bd1fbf202d9e +size 890473 diff --git a/images/RochesterInstituteofTechnology.png b/images/RochesterInstituteofTechnology.png new file mode 100644 index 0000000000000000000000000000000000000000..e34d06febaf2dd3537ff24b8f47019f6cfd2b0b9 --- /dev/null +++ b/images/RochesterInstituteofTechnology.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f858c6882b039d5f072b8b8b484f9a1330a12925ea173bcd81f02c8918c3a2 +size 225086 diff --git a/images/Rockhurst University.png b/images/Rockhurst University.png new file mode 100644 index 0000000000000000000000000000000000000000..a6c97e6e29e82c7cab02a67b8406ac28cf084a41 --- /dev/null +++ b/images/Rockhurst University.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b0b541f6fec87c4e3bfe6a029184a50fe6b092929f305b1778d3a8f7357b87 +size 247856 diff --git a/images/SaintLouisUniversity.png b/images/SaintLouisUniversity.png new file mode 100644 index 0000000000000000000000000000000000000000..90e1431dcc3e18ea0a64ae92f4d7ff4a3e3b8643 --- /dev/null +++ b/images/SaintLouisUniversity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12ea7fce14f6f026208ae57601e13af55d0eff9574f8242ecc070d420b950d1c +size 270592 diff --git a/images/Tulane.png b/images/Tulane.png new file mode 100644 index 0000000000000000000000000000000000000000..f5b6cda4dc4c12b305c6dcc99aeb91524cb3c082 --- /dev/null +++ b/images/Tulane.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25cb94640de3f76ae4d47926ce1eff4841a201826932a2c664b4d0fa6b7326f7 +size 887202 diff --git a/images/UniversityofAlabamaatBirmingham.png b/images/UniversityofAlabamaatBirmingham.png new file mode 100644 index 0000000000000000000000000000000000000000..d68f971bbdbd5c27c44f120ee902b4e5f40d569a --- /dev/null +++ b/images/UniversityofAlabamaatBirmingham.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442b280dd7eb93b3f460043c12a9dd9d474b3ad3282d19629c13c4c0b4c4043a +size 224470 diff --git a/images/UniversityofDayton.png b/images/UniversityofDayton.png new file mode 100644 index 0000000000000000000000000000000000000000..b30a188ac00bc25bb8f28aea08fd28e4fc305901 --- /dev/null +++ b/images/UniversityofDayton.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35bf1cf4fb626ccaf4d04bed8b2ff81d8d2beeee85bda76fc27a17682e122e3c +size 259496 diff --git a/images/UniversityofMissouriSaintLouis.png b/images/UniversityofMissouriSaintLouis.png new file mode 100644 index 0000000000000000000000000000000000000000..72e9740ebca0f689fd334693ea6d71ea2d145cff --- /dev/null +++ b/images/UniversityofMissouriSaintLouis.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:689ae3e1340c6fdf26da6da74cef8fd621af7224ac49e1d8f20436900053cae5 +size 204134 diff --git a/images/UniversityofVirginia.png b/images/UniversityofVirginia.png new file mode 100644 index 0000000000000000000000000000000000000000..05e4ad9b6e3dcc4859fe66bb175b100a90c779a7 --- /dev/null +++ b/images/UniversityofVirginia.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b089b72cf9846bb3693dab70b51404650cb9746b5e4e1d0a76780f60ff2d541e +size 204131 diff --git a/images/a1db9ae3-7ba4-458b-a550-1ac5057c74b1.png b/images/a1db9ae3-7ba4-458b-a550-1ac5057c74b1.png new file mode 100644 index 0000000000000000000000000000000000000000..579ed9933cad83627830aaa3ea1832c4abe67bb6 --- /dev/null +++ b/images/a1db9ae3-7ba4-458b-a550-1ac5057c74b1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c646dcef7794d49e55da38fe2e19c13b3b221cd41ca46bd0d1b99f817309e4ab +size 961287 diff --git a/images/califoniabaptistuniversity.png b/images/califoniabaptistuniversity.png new file mode 100644 index 0000000000000000000000000000000000000000..6cef0e62cdf127f6e6a9c382eba4ed28733f48c1 --- /dev/null +++ b/images/califoniabaptistuniversity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da1b1e73d30b2e54a12e133f31346bdfc05d05aa4d594ea34ad872beccd6e5d +size 906112 diff --git a/images/dayton.png b/images/dayton.png new file mode 100644 index 0000000000000000000000000000000000000000..e67cf8ae7209b7854f9f97e947fc9096de1ecaa2 --- /dev/null +++ b/images/dayton.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8187a734bc3bb66504709c2a17ed41ac2e85a228b9c0ffe6649d9116ae43877 +size 826656 diff --git a/images/drewuniversity.png b/images/drewuniversity.png new file mode 100644 index 0000000000000000000000000000000000000000..fcf0aa45b4f337dc5005def6e06a14b35ed6f5c2 --- /dev/null +++ b/images/drewuniversity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4721e32bbb56aa75c71f17027a998cc9b4ee09c2a83ac740e068d4a858dfdbaa +size 970828 diff --git a/images/first_page.jpg b/images/first_page.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5c22f3502022096e98e2981264318d15951b47c0 --- /dev/null +++ b/images/first_page.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196cc875931f278493d1495eaaca2a555c3bf11e0a44a267e2ad05407bc8b5d4 +size 674935 diff --git a/images/grand_valley_image.png b/images/grand_valley_image.png new file mode 100644 index 0000000000000000000000000000000000000000..f06026dfda8eb32b0800692c12eebe84c6bb46da --- /dev/null +++ b/images/grand_valley_image.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6acaf996f464726a6222c0a7ba861869e6aa0fa436b5f26e0c271407b3acc12 +size 747211 diff --git a/images/illinois_state_university.png b/images/illinois_state_university.png new file mode 100644 index 0000000000000000000000000000000000000000..0ac1cafe054a0942641e1213b7c2ad9007d98817 --- /dev/null +++ b/images/illinois_state_university.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4eec06cda316d4aba21cef186c9ef55b9e9adef175d22fc2b1656c86a31619d +size 935747 diff --git a/images/iup.jpg b/images/iup.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c376d9311909206db353a15c3e3ba9dc58403e35 --- /dev/null +++ b/images/iup.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd82d02497f5ec6c5324499232855a53774a5622ec6747af4ff91b8cc7244ca +size 895529 diff --git a/images/iup.png b/images/iup.png new file mode 100644 index 0000000000000000000000000000000000000000..56e79f3744b8babcaff3f77ef078020225033d47 --- /dev/null +++ b/images/iup.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db254cfc0697cddfe19cf580a0521ac1721fa734557e476e492e2da0ca65b6d +size 955053 diff --git a/images/kenya_airlift_header.jpg b/images/kenya_airlift_header.jpg new file mode 100644 index 0000000000000000000000000000000000000000..16015a99f9dd745724d856098d02847bfffbb982 Binary files /dev/null and b/images/kenya_airlift_header.jpg differ diff --git a/images/label.png b/images/label.png new file mode 100644 index 0000000000000000000000000000000000000000..8a057ef0808f97540412b615ff2e67107109a541 --- /dev/null +++ b/images/label.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3a090bbb5af02e89ffa809d2a77aee53ebcac6cbdc4da84d5299fc36c37b5f +size 205237 diff --git a/images/lewis_university.png b/images/lewis_university.png new file mode 100644 index 0000000000000000000000000000000000000000..c8640cd0bbaf1bdabd468422453306beecc405f5 --- /dev/null +++ b/images/lewis_university.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1c7248c0a0a12632da266c058e6c0ec2fd98a55b5d0c7d834e43e5a24ff485 +size 880258 diff --git a/images/missouri.png b/images/missouri.png new file mode 100644 index 0000000000000000000000000000000000000000..b91a9ffab40e4eb11319fd22c606c33155f69e79 --- /dev/null +++ b/images/missouri.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30570f5c1405928bd0893d75a90106d231c6cbbccb4ca12c8180148da3a1d890 +size 207018 diff --git a/images/missouristateuniversity.png b/images/missouristateuniversity.png new file mode 100644 index 0000000000000000000000000000000000000000..03f4aea282d5ac53f0b601d55c08ddae89f287cb --- /dev/null +++ b/images/missouristateuniversity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8b735e23e18d328e99362550b780454aa92c44b2336d93cd11a09d710aa4e0 +size 790957 diff --git a/images/missuri-saintluis.png b/images/missuri-saintluis.png new file mode 100644 index 0000000000000000000000000000000000000000..3739d5e1017422502fd8b2a668d43deb112e5157 --- /dev/null +++ b/images/missuri-saintluis.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfdde4617cd562cc83c9405065055ab98b7bcc1b547ea67f253645c3bf4d62f9 +size 712438 diff --git a/images/montana.png b/images/montana.png new file mode 100644 index 0000000000000000000000000000000000000000..e316cfc8f63393f72c3021cf96c1af6c913eba2b --- /dev/null +++ b/images/montana.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d8a767e71d0c3cce59bb5477cacc8a91753a6ce537f43ecab773c2d7a81218 +size 883221 diff --git a/images/montana_state_university.png b/images/montana_state_university.png new file mode 100644 index 0000000000000000000000000000000000000000..f4137f3dc4768fce6dc573cabeede25c12e9aada --- /dev/null +++ b/images/montana_state_university.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc773387d8b51b6c5ec586a2130c9c2de43651b75033d3db46eec2cdd56dcda +size 908115 diff --git a/images/oklahoma.png b/images/oklahoma.png new file mode 100644 index 0000000000000000000000000000000000000000..d62035160850249e7b66a33930f694e59bac17c2 --- /dev/null +++ b/images/oklahoma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233f65febfbd1d7a6f709936ade8b25019b0b4ce30e9a39343574f54b6b873e6 +size 922004 diff --git a/images/oregon_state_university.png b/images/oregon_state_university.png new file mode 100644 index 0000000000000000000000000000000000000000..4e7a1dbeca75805d19e5e6bc9105295caf835f7d --- /dev/null +++ b/images/oregon_state_university.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb88df25de8ce41a6be2ae6bb799fc7ca84deb66775bc14eec76cce5e78ed20 +size 245690 diff --git a/images/quinnipiacuniversity.png b/images/quinnipiacuniversity.png new file mode 100644 index 0000000000000000000000000000000000000000..c4e3f58d1ec09c3f74959faec50264fdb97c0caa --- /dev/null +++ b/images/quinnipiacuniversity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0264b4634e94cc4a4c983f2b886180a1dc8db8a224184958cff5e0177a8d8e14 +size 878871 diff --git a/images/rutgers_camden_university.png b/images/rutgers_camden_university.png new file mode 100644 index 0000000000000000000000000000000000000000..6240cbc881c3b99c19b5ff8e6bb620edd1482835 --- /dev/null +++ b/images/rutgers_camden_university.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b5577dd59e85d9c5da650672c2f1ecc9e207ef3ea2cdce5dd187516f010898 +size 222946 diff --git a/images/texas_state_university.png b/images/texas_state_university.png new file mode 100644 index 0000000000000000000000000000000000000000..3735988c03418ad685793fa1b85c6e8ab3e33726 --- /dev/null +++ b/images/texas_state_university.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e03428147dec582f239ad8935793b01d94e31cac21a611cc70c3ad5dcacb12e6 +size 694852 diff --git a/images/toc.jpg b/images/toc.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c274981133c7ef79d9a91f7cf4234e4756a1811d --- /dev/null +++ b/images/toc.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d766539ab20a6ef3806f6e23ebee1d2d8f0b5b4a8caca47c041005cb0f62d02c +size 815512 diff --git a/images/university_building_2 (1).png b/images/university_building_2 (1).png new file mode 100644 index 0000000000000000000000000000000000000000..2c7266563504b752d8c70f14caebc1b69b445240 --- /dev/null +++ b/images/university_building_2 (1).png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1243ef7a7ebdaa587ab34003781ecaab1220c00741455d38575d3efe574996c +size 255857 diff --git a/images/university_of_oklahoma.png b/images/university_of_oklahoma.png new file mode 100644 index 0000000000000000000000000000000000000000..59878c28c9b4e6bfce9ac59f91d8f8102fae3df7 --- /dev/null +++ b/images/university_of_oklahoma.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d14aa3c08aeb8235c50b2ae33c4e1cb0c1ec5dde8b1968ce1f89600e54f471 +size 225072 diff --git a/images/universityofdelaware.png b/images/universityofdelaware.png new file mode 100644 index 0000000000000000000000000000000000000000..7aaa76e6189a91a5d2faccce31bc4f3660d751b9 --- /dev/null +++ b/images/universityofdelaware.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ba804399996d69ff3408ba2fc1492eaa8564df5f6e41e204189cbf41897e3ae +size 237343 diff --git a/images/universityoflouville.png b/images/universityoflouville.png new file mode 100644 index 0000000000000000000000000000000000000000..77d467d76fbe616d3042836f48e3796e75c62bdc --- /dev/null +++ b/images/universityoflouville.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c315cb3fa156b2ce3aca8da3cb93a4c5d517480b540d8a203ab4ab3cac34164 +size 264750 diff --git a/images/universityofsouthdakota.png b/images/universityofsouthdakota.png new file mode 100644 index 0000000000000000000000000000000000000000..8b897c32e0965466b44f6864285246082ea3171f --- /dev/null +++ b/images/universityofsouthdakota.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b659cf11af3518f4c4ed330e773ef568a7e6ee96b350ff3b9471e240454935 +size 226816 diff --git a/images/viginia.png b/images/viginia.png new file mode 100644 index 0000000000000000000000000000000000000000..05e4ad9b6e3dcc4859fe66bb175b100a90c779a7 --- /dev/null +++ b/images/viginia.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b089b72cf9846bb3693dab70b51404650cb9746b5e4e1d0a76780f60ff2d541e +size 204131 diff --git a/images/virginia_commonwealth_university.png b/images/virginia_commonwealth_university.png new file mode 100644 index 0000000000000000000000000000000000000000..1a26b85477554d23e4a14ef84fea1cccbdf8699a --- /dev/null +++ b/images/virginia_commonwealth_university.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b6bdd213b1a1ce81343ab4b7049827de970c097142e595968bcb59ca546420 +size 270195 diff --git a/images/webster University.png b/images/webster University.png new file mode 100644 index 0000000000000000000000000000000000000000..99438d82e979f310e22a5c6e51584dcc733d5112 --- /dev/null +++ b/images/webster University.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d37d66f79bd4d24979dc1191a376f89b0f5fbdeb9eaf9ccafa557e2f61c563a +size 258918 diff --git a/images/wilkesuniversity.png b/images/wilkesuniversity.png new file mode 100644 index 0000000000000000000000000000000000000000..71c6de30de1271c6555a111cf1fe8e59af9b4f7b --- /dev/null +++ b/images/wilkesuniversity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a0d131f97233f13959b3d929b9a0d6764e460a55ec56b6c71ce065977b6761 +size 228296 diff --git a/images/william_jessup_university.png b/images/william_jessup_university.png new file mode 100644 index 0000000000000000000000000000000000000000..16b956872e8fceb4eeb97ab2a6bab2a31d21d470 --- /dev/null +++ b/images/william_jessup_university.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:533b3efff931424afa6eb8627666ee8f17650edf8f9a439380ebd560a7e9c469 +size 235729 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..274cf3be3783d0d9a1f299c139dd625e057ee2ea --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +fastapi>=0.135.0 +uvicorn[standard]>=0.34.0 +pydantic>=2.10.0 +pydantic-settings>=2.7.0 +httpx>=0.28.0 +jinja2>=3.1.4 +markupsafe>=3.0.0 +playwright>=1.50.0 +pypdf>=5.0.0 +python-dotenv>=1.0.1