Spaces:
Running
Running
File size: 4,578 Bytes
8e2e5f4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | """
Merge DepEd lesson module PDFs and upload to Firebase Storage.
Run: python backend/scripts/upload_lesson_modules.py
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from pypdf import PdfWriter, PdfReader
LOCAL_MODULES_DIR = Path(__file__).resolve().parents[1].parent / "datasets" / "lesson_modules"
FIREBASE_STORAGE_BUCKET = "mathpulse-ai-2026.firebasestorage.app"
# Upload plan
UPLOAD_JOBS = [
{
"id": "basic-calc-q3",
"display_name": "Basic Calculus Q3",
"subject": "Basic Calculus",
"subjectId": "basic-calc",
"quarter": 3,
"storage_path": "curriculum/basic_calc/SDO_Navotas_BasicCalc_SHS_Q3.FV.pdf",
"local_dir": LOCAL_MODULES_DIR / "basic_calculus_q3",
"filename": "Basic Calculus-Q3-Module-{n}.pdf",
"modules": list(range(1, 9)), # Modules 1-8
},
{
"id": "gen-math-q2",
"display_name": "General Mathematics Q2",
"subject": "General Mathematics",
"subjectId": "gen-math",
"quarter": 2,
"storage_path": "curriculum/gen_math_q2/SDO_Navotas_GenMath_SHS_Q2.FV.pdf",
"local_dir": LOCAL_MODULES_DIR / "genmath_q2",
"filename": "genmath_q2_mod{n}_*.pdf",
"modules": [2, 3], # Modules 2 and 3 only
},
]
def merge_pdfs(job: dict) -> Path | None:
"""Merge multiple PDFs into a single output file. Returns output path."""
output_dir = LOCAL_MODULES_DIR / "merged"
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / f"{job['id']}_merged.pdf"
writer = PdfWriter()
for mod_num in job["modules"]:
if job["id"] == "basic-calc-q3":
fname = job["filename"].format(n=mod_num)
else:
# GenMath modules have specific naming
fname = None
pattern = job["filename"].format(n=mod_num)
for f in job["local_dir"].glob(pattern):
fname = f.name
break
if fname is None:
print(f" [WARN] Could not find file for module {mod_num}")
continue
src_path = job["local_dir"] / fname
if not src_path.exists():
print(f" [WARN] File not found: {src_path}")
continue
reader = PdfReader(str(src_path))
print(f" + {src_path.name} ({len(reader.pages)} pages)")
for page in reader.pages:
writer.add_page(page)
print(f" Writing {output_path.name} ({len(writer.pages)} total pages)")
with open(output_path, "wb") as f:
writer.write(f)
return output_path
def upload_to_firebase(local_path: Path, storage_path: str) -> bool:
"""Upload a PDF file to Firebase Storage."""
try:
import firebase_admin
from firebase_admin import credentials, storage
except ImportError:
print(" ERROR: firebase-admin not installed")
return False
sa_file = Path(__file__).resolve().parents[1].parent / ".secrets" / "firebase-service-account.json"
if not sa_file.exists():
print(f" ERROR: Service account not found at {sa_file}")
return False
if not firebase_admin._apps:
cred = credentials.Certificate(str(sa_file))
firebase_admin.initialize_app(cred, {"storageBucket": FIREBASE_STORAGE_BUCKET})
bucket = storage.bucket()
blob = bucket.blob(storage_path)
print(f" Uploading to gs://{bucket.name}/{storage_path}")
blob.upload_from_filename(str(local_path), content_type="application/pdf")
print(f" Upload complete!")
return True
def main():
print("=" * 60)
print("MathPulse AI — Lesson Module PDF Uploader")
print("=" * 60)
for job in UPLOAD_JOBS:
print(f"\n[{job['display_name']}]")
print("-" * 40)
# Step 1: Merge PDFs
output_path = merge_pdfs(job)
if not output_path or not output_path.exists():
print(f" [FAIL] Merge failed for {job['id']}")
continue
# Step 2: Upload to Firebase
success = upload_to_firebase(output_path, job["storage_path"])
if not success:
print(f" [FAIL] Upload failed for {job['id']}")
continue
print(f"\n SUCCESS: {job['display_name']}")
print(f" Storage path: gs://{FIREBASE_STORAGE_BUCKET}/{job['storage_path']}")
print(f" Pages: {len(PdfReader(str(output_path)).pages)}")
print("\n" + "=" * 60)
print("Done!")
if __name__ == "__main__":
main() |