Spaces:
Running
Running
| """ | |
| Merge DepEd lesson module PDFs and upload to Firebase Storage. | |
| Run: python backend/scripts/upload_lesson_modules.py | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| from pathlib import Path | |
| sys.path.insert(0, str(Path(__file__).resolve().parents[1])) | |
| from pypdf import PdfWriter, PdfReader | |
| LOCAL_MODULES_DIR = Path(__file__).resolve().parents[1].parent / "datasets" / "lesson_modules" | |
| FIREBASE_STORAGE_BUCKET = "mathpulse-ai-2026.firebasestorage.app" | |
| # Upload plan | |
| UPLOAD_JOBS = [ | |
| { | |
| "id": "basic-calc-q3", | |
| "display_name": "Basic Calculus Q3", | |
| "subject": "Basic Calculus", | |
| "subjectId": "basic-calc", | |
| "quarter": 3, | |
| "storage_path": "curriculum/basic_calc/SDO_Navotas_BasicCalc_SHS_Q3.FV.pdf", | |
| "local_dir": LOCAL_MODULES_DIR / "basic_calculus_q3", | |
| "filename": "Basic Calculus-Q3-Module-{n}.pdf", | |
| "modules": list(range(1, 9)), # Modules 1-8 | |
| }, | |
| { | |
| "id": "gen-math-q2", | |
| "display_name": "General Mathematics Q2", | |
| "subject": "General Mathematics", | |
| "subjectId": "gen-math", | |
| "quarter": 2, | |
| "storage_path": "curriculum/gen_math_q2/SDO_Navotas_GenMath_SHS_Q2.FV.pdf", | |
| "local_dir": LOCAL_MODULES_DIR / "genmath_q2", | |
| "filename": "genmath_q2_mod{n}_*.pdf", | |
| "modules": [2, 3], # Modules 2 and 3 only | |
| }, | |
| ] | |
| def merge_pdfs(job: dict) -> Path | None: | |
| """Merge multiple PDFs into a single output file. Returns output path.""" | |
| output_dir = LOCAL_MODULES_DIR / "merged" | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| output_path = output_dir / f"{job['id']}_merged.pdf" | |
| writer = PdfWriter() | |
| for mod_num in job["modules"]: | |
| if job["id"] == "basic-calc-q3": | |
| fname = job["filename"].format(n=mod_num) | |
| else: | |
| # GenMath modules have specific naming | |
| fname = None | |
| pattern = job["filename"].format(n=mod_num) | |
| for f in job["local_dir"].glob(pattern): | |
| fname = f.name | |
| break | |
| if fname is None: | |
| print(f" [WARN] Could not find file for module {mod_num}") | |
| continue | |
| src_path = job["local_dir"] / fname | |
| if not src_path.exists(): | |
| print(f" [WARN] File not found: {src_path}") | |
| continue | |
| reader = PdfReader(str(src_path)) | |
| print(f" + {src_path.name} ({len(reader.pages)} pages)") | |
| for page in reader.pages: | |
| writer.add_page(page) | |
| print(f" Writing {output_path.name} ({len(writer.pages)} total pages)") | |
| with open(output_path, "wb") as f: | |
| writer.write(f) | |
| return output_path | |
| def upload_to_firebase(local_path: Path, storage_path: str) -> bool: | |
| """Upload a PDF file to Firebase Storage.""" | |
| try: | |
| import firebase_admin | |
| from firebase_admin import credentials, storage | |
| except ImportError: | |
| print(" ERROR: firebase-admin not installed") | |
| return False | |
| sa_file = Path(__file__).resolve().parents[1].parent / ".secrets" / "firebase-service-account.json" | |
| if not sa_file.exists(): | |
| print(f" ERROR: Service account not found at {sa_file}") | |
| return False | |
| if not firebase_admin._apps: | |
| cred = credentials.Certificate(str(sa_file)) | |
| firebase_admin.initialize_app(cred, {"storageBucket": FIREBASE_STORAGE_BUCKET}) | |
| bucket = storage.bucket() | |
| blob = bucket.blob(storage_path) | |
| print(f" Uploading to gs://{bucket.name}/{storage_path}") | |
| blob.upload_from_filename(str(local_path), content_type="application/pdf") | |
| print(f" Upload complete!") | |
| return True | |
| def main(): | |
| print("=" * 60) | |
| print("MathPulse AI — Lesson Module PDF Uploader") | |
| print("=" * 60) | |
| for job in UPLOAD_JOBS: | |
| print(f"\n[{job['display_name']}]") | |
| print("-" * 40) | |
| # Step 1: Merge PDFs | |
| output_path = merge_pdfs(job) | |
| if not output_path or not output_path.exists(): | |
| print(f" [FAIL] Merge failed for {job['id']}") | |
| continue | |
| # Step 2: Upload to Firebase | |
| success = upload_to_firebase(output_path, job["storage_path"]) | |
| if not success: | |
| print(f" [FAIL] Upload failed for {job['id']}") | |
| continue | |
| print(f"\n SUCCESS: {job['display_name']}") | |
| print(f" Storage path: gs://{FIREBASE_STORAGE_BUCKET}/{job['storage_path']}") | |
| print(f" Pages: {len(PdfReader(str(output_path)).pages)}") | |
| print("\n" + "=" * 60) | |
| print("Done!") | |
| if __name__ == "__main__": | |
| main() |