Agentic-Service-Data-Eyond-Catalog

Running

File size: 3,919 Bytes

"""Document management API endpoints."""
 
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File
from sqlalchemy.ext.asyncio import AsyncSession
from src.db.postgres.connection import get_db
from src.document.document_service import document_service
from src.middlewares.logging import get_logger, log_execution
from src.middlewares.rate_limit import limiter
from src.pipeline.document_pipeline import document_pipeline
from pydantic import BaseModel
from typing import List
 
logger = get_logger("document_api")
 
router = APIRouter(prefix="/api/v1", tags=["Documents"])
 
 
class DocumentResponse(BaseModel):
    id: str
    filename: str
    status: str
    file_size: int
    file_type: str
    created_at: str
 
 
# NOTE: Keep in sync with SUPPORTED_FILE_TYPES in src/pipeline/document_pipeline.py
_DOC_TYPES = [
    {"doc_type": "pdf", "max_size": 10, "status": "active", "message": None},
    {"doc_type": "docx", "max_size": 10, "status": "active", "message": None},
    {"doc_type": "txt", "max_size": 10, "status": "active", "message": None},
    {"doc_type": "csv", "max_size": 10, "status": "active", "message": None},
    {"doc_type": "xlsx", "max_size": 10, "status": "active", "message": None},
]


@router.get(
    "/documents/doctypes",
    summary="List supported document types",
    response_description="All document types supported by DataEyond with their size limits and status.",
)
@log_execution(logger)
async def get_document_types():
    """Return every document type DataEyond can process, with max file size and active/inactive status."""
    return {"status": "success", "data": _DOC_TYPES}


@router.get("/documents/{user_id}", response_model=List[DocumentResponse])
@log_execution(logger)
async def list_documents(
    user_id: str,
    db: AsyncSession = Depends(get_db)
):
    """List all documents for a user."""
    documents = await document_service.get_user_documents(db, user_id)
    return [
        DocumentResponse(
            id=doc.id,
            filename=doc.filename,
            status=doc.status,
            file_size=doc.file_size or 0,
            file_type=doc.file_type,
            created_at=doc.created_at.isoformat()
        )
        for doc in documents
    ]
 
 
@router.post("/document/upload")
@limiter.limit("10/minute")
@log_execution(logger)
async def upload_document(
    request: Request,
    file: UploadFile = File(...),
    user_id: str = None,
    db: AsyncSession = Depends(get_db)
):
    """Upload a document."""
    if not user_id:
        raise HTTPException(status_code=400, detail="user_id is required")
 
    data = await document_pipeline.upload(file, user_id, db)
    return {"status": "success", "message": "Document uploaded successfully", "data": data}
 
 
@router.delete("/document/delete")
@log_execution(logger)
async def delete_document(
    document_id: str,
    user_id: str,
    db: AsyncSession = Depends(get_db)
):
    """Delete a document."""
    await document_pipeline.delete(document_id, user_id, db)
    from src.pipeline.triggers import on_tabular_deleted
    await on_tabular_deleted(document_id, user_id)
    return {"status": "success", "message": "Document deleted successfully"}
 
 
@router.post("/document/process")
@log_execution(logger)
async def process_document(
    document_id: str,
    user_id: str,
    db: AsyncSession = Depends(get_db)
):
    """Process document and ingest to vector index."""
    data = await document_pipeline.process(document_id, user_id, db)

    if data["file_type"] in ("csv", "xlsx"):
        from src.pipeline.triggers import on_tabular_uploaded
        try:
            await on_tabular_uploaded(document_id, user_id)
        except Exception as e:
            logger.error("catalog ingestion failed after process", document_id=document_id, error=str(e))

    return {"status": "success", "message": "Document processed successfully", "data": data}