File size: 6,568 Bytes
de4b0cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
Firebase Storage PDF loader for curriculum ingestion.
Downloads PDFs from Firebase Storage and extracts text for ChromaDB indexing.
"""

from __future__ import annotations

import logging
import os
from pathlib import Path
from typing import Dict, List, Optional, Tuple

logger = logging.getLogger("mathpulse.fb_storage_loader")

_FIREBASE_INITIALIZED = False


def _init_firebase_storage() -> Tuple[any, any]:
    global _FIREBASE_INITIALIZED

    if _FIREBASE_INITIALIZED:
        try:
            from firebase_admin import storage as fb_storage
            bucket = fb_storage.bucket()
            return fb_storage, bucket
        except Exception as e:
            logger.warning("Firebase storage unavailable: %s", e)
            _FIREBASE_INITIALIZED = False
            return None, None

    try:
        import firebase_admin
        from firebase_admin import credentials, storage
    except ImportError:
        logger.warning("firebase_admin not installed")
        return None, None

    if firebase_admin._apps:
        _FIREBASE_INITIALIZED = True
        try:
            bucket = storage.bucket()
            return storage, bucket
        except Exception as e:
            logger.warning("Firebase storage bucket unavailable: %s", e)
            return None, None

    sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
    sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")

    try:
        if sa_json:
            import json as _json
            creds = credentials.Certificate(_json.loads(sa_json))
        elif sa_file and Path(sa_file).exists():
            creds = credentials.Certificate(sa_file)
        else:
            creds = credentials.ApplicationDefault()

        firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
        _FIREBASE_INITIALIZED = True
        bucket = storage.bucket()
        return storage, bucket
    except Exception as e:
        logger.warning("Firebase init failed: %s", e)
        return None, None


def download_pdf_from_storage(storage_path: str, dest_path: Optional[str] = None) -> Optional[bytes]:
    """Download a PDF from Firebase Storage and return its bytes."""
    _, bucket = _init_firebase_storage()
    if bucket is None:
        logger.warning("Firebase Storage not available, skipping download")
        return None

    try:
        blob = bucket.blob(storage_path)
        if not blob.exists():
            logger.warning("Blob does not exist: %s", storage_path)
            return None
        bytes_data = blob.download_as_bytes()
        logger.info("Downloaded %s (%d bytes)", storage_path, len(bytes_data))

        if dest_path:
            Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
            with open(dest_path, "wb") as f:
                f.write(bytes_data)
            logger.info("Saved to %s", dest_path)

        return bytes_data
    except Exception as e:
        logger.error("Failed to download %s: %s", storage_path, e)
        return None


def list_curriculum_blobs(prefix: str = "curriculum/") -> List[Dict[str, str]]:
    """List all blobs under a prefix in Firebase Storage."""
    _, bucket = _init_firebase_storage()
    if bucket is None:
        return []

    blobs = bucket.list_blobs(prefix=prefix)
    result = []
    for blob in blobs:
        if blob.name.endswith(".pdf"):
            result.append({
                "name": blob.name,
                "size": blob.size,
                "updated": str(blob.updated) if blob.updated else None,
                "download_url": f"https://storage.googleapis.com/{bucket.name}/{blob.name}",
            })
    return result


# NOTE: Curriculum guide PDFs (shaping papers) are stored in Firebase Storage
# for system reference but are NOT included in RAG ingestion because they
# contain only learning objectives and course descriptions — insufficient
# content for lesson generation (typically <10 chunks each).
#
# Only SDO teaching modules (full lesson content with examples and problems)
# are included in the RAG pipeline.

PDF_METADATA: Dict[str, dict] = {
    # General Mathematics Q1 — SDO Navotas teaching module (100 pages, ~117k chars)
    "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
        "subject": "General Mathematics",
        "subjectId": "gen-math",
        "type": "sdo_module",
        "content_domain": "general",
        "quarter": 1,
        "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
    },
    # General Mathematics Q2 — Interest & Annuities modules (~27-35 pages each)
    "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf": {
        "subject": "General Mathematics",
        "subjectId": "gen-math",
        "type": "sdo_module",
        "content_domain": "general",
        "quarter": 2,
        "storage_path": "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf",
    },
    "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf": {
        "subject": "General Mathematics",
        "subjectId": "gen-math",
        "type": "sdo_module",
        "content_domain": "general",
        "quarter": 2,
        "storage_path": "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf",
    },
    "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf": {
        "subject": "General Mathematics",
        "subjectId": "gen-math",
        "type": "sdo_module",
        "content_domain": "general",
        "quarter": 2,
        "storage_path": "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf",
    },
    "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf": {
        "subject": "General Mathematics",
        "subjectId": "gen-math",
        "type": "sdo_module",
        "content_domain": "general",
        "quarter": 2,
        "storage_path": "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf",
    },
    # Statistics and Probability — Full textbook (331 pages, ~607k chars)
    "curriculum/stat_prob/Full.pdf": {
        "subject": "Statistics and Probability",
        "subjectId": "stats-prob",
        "type": "sdo_module",
        "content_domain": "statistics",
        "quarter": 1,
        "storage_path": "curriculum/stat_prob/Full.pdf",
    },
}