File size: 3,133 Bytes
76db545
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""
FastAPI application factory.
Uses lifespan context manager to load the Whisper model at startup
and register language adapters — keeping a single backbone in GPU memory.
"""
from __future__ import annotations

import logging
import os
from contextlib import asynccontextmanager

import yaml
from fastapi import FastAPI

from src.api.middleware import register_middleware
from src.api.routes import health, iot, transcribe
from src.engine.adapter_manager import AdapterManager
from src.engine.transcriber import Transcriber
from src.engine.whisper_base import WhisperBackbone
from src.iot.sensor_bridge import SensorBridge

logger = logging.getLogger(__name__)

logging.basicConfig(
    level=os.getenv("LOG_LEVEL", "INFO"),
    format="%(asctime)s %(levelname)s %(name)s — %(message)s",
)


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Load model at startup, free GPU memory at shutdown."""
    with open("configs/base_config.yaml") as f:
        config = yaml.safe_load(f)

    hf_token = os.getenv("HF_TOKEN")
    device = os.getenv("DEVICE", "cuda")
    bambara_path = os.getenv("BAMBARA_ADAPTER_PATH", "./adapters/bambara")
    fula_path = os.getenv("FULA_ADAPTER_PATH", "./adapters/fula")
    sensor_api_url = os.getenv("SENSOR_API_URL") or None

    # 1. Load backbone
    logger.info("Loading Whisper backbone...")
    backbone = WhisperBackbone("configs/base_config.yaml")
    backbone.load(device=device, hf_token=hf_token)

    # 2. Register adapters (they are loaded on first use via activate())
    adapter_manager = AdapterManager(backbone.model, config)
    adapter_manager.register("bam", bambara_path)
    adapter_manager.register("ful", fula_path)

    # 3. Pre-load the default adapter to warm up VRAM
    try:
        adapter_manager.load_adapter("bam")
        logger.info("Default adapter 'bam' pre-loaded.")
    except Exception as e:
        logger.warning("Could not pre-load 'bam' adapter: %s", e)

    # 4. Create transcriber and sensor bridge
    transcriber = Transcriber(backbone, adapter_manager)
    sensor_bridge = SensorBridge(sensor_api_url=sensor_api_url)

    # 5. Attach to app.state for dependency injection
    app.state.backbone = backbone
    app.state.adapter_manager = adapter_manager
    app.state.transcriber = transcriber
    app.state.sensor_bridge = sensor_bridge

    logger.info("Sahel-Agri Voice AI server ready.")
    yield

    # Shutdown
    logger.info("Shutting down — freeing GPU memory...")
    backbone.free()


def create_app() -> FastAPI:
    app = FastAPI(
        title="Sahel-Agri Voice AI",
        description=(
            "Modular STT engine for Bambara and Fula — serving Mali and Guinea farmers "
            "via voice-first agricultural intelligence."
        ),
        version="0.1.0",
        lifespan=lifespan,
    )

    register_middleware(app)

    # Register routes
    app.include_router(health.router, prefix="/api/v1", tags=["health"])
    app.include_router(transcribe.router, prefix="/api/v1", tags=["transcribe"])
    app.include_router(iot.router, prefix="/api/v1", tags=["iot"])

    return app


app = create_app()