mathpulse-api-v3test / scripts /upload_vectorstore_to_firebase.py
github-actions[bot]
🚀 Auto-deploy backend from GitHub (ce99ac1)
9594951
"""
Upload vectorstore directory to Firebase Storage.
Run: python -m backend.scripts.upload_vectorstore_to_firebase
"""
from __future__ import annotations
import logging
import os
import sys
from pathlib import Path
logger = logging.getLogger("mathpulse.upload_vectorstore")
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from backend.rag.firebase_storage_loader import _init_firebase_storage
VECTORSTORE_SOURCE_DIR = Path(__file__).resolve().parents[3] / "datasets" / "vectorstore"
REMOTE_PREFIX = "vectorstore/"
def upload_directory(local_dir: Path, bucket, prefix: str):
"""Recursively upload a local directory to Firebase Storage prefix."""
uploaded = 0
skipped = 0
for root, dirs, files in os.walk(local_dir):
for filename in files:
local_path = Path(root) / filename
relative_path = local_path.relative_to(local_dir)
remote_path = f"{prefix}{relative_path.as_posix()}"
try:
blob = bucket.blob(remote_path)
blob.upload_from_filename(str(local_path))
logger.info("Uploaded: %s (%d bytes)", remote_path, local_path.stat().st_size)
uploaded += 1
except Exception as e:
logger.error("Failed to upload %s: %s", remote_path, e)
skipped += 1
return uploaded, skipped
if __name__ == "__main__":
import argparse
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
parser = argparse.ArgumentParser(description="Upload vectorstore to Firebase Storage")
parser.add_argument("--source", type=str, default=str(VECTORSTORE_SOURCE_DIR),
help="Local vectorstore directory")
parser.add_argument("--prefix", type=str, default=REMOTE_PREFIX,
help="Remote path prefix in Firebase Storage")
args = parser.parse_args()
source_dir = Path(args.source)
if not source_dir.exists():
logger.error("Source directory does not exist: %s", source_dir)
sys.exit(1)
_, bucket = _init_firebase_storage()
if bucket is None:
logger.error("Firebase Storage not available")
sys.exit(1)
logger.info("Uploading vectorstore from %s to gs://%s/%s",
source_dir, bucket.name, args.prefix)
uploaded, skipped = upload_directory(source_dir, bucket, args.prefix)
logger.info("Upload complete: %d uploaded, %d skipped", uploaded, skipped)