""" Phase 4b: Start the FastAPI inference server. Usage: python scripts/run_server.py python scripts/run_server.py --host 0.0.0.0 --port 8000 """ import argparse import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from dotenv import load_dotenv load_dotenv() import uvicorn if __name__ == "__main__": parser = argparse.ArgumentParser(description="Start Sahel-Agri Voice AI server") parser.add_argument("--host", default="0.0.0.0") parser.add_argument("--port", type=int, default=8000) parser.add_argument("--reload", action="store_true", help="Enable hot-reload (dev only)") args = parser.parse_args() print(f"Starting server on http://{args.host}:{args.port}") print("Endpoints:") print(f" GET http://localhost:{args.port}/api/v1/health") print(f" POST http://localhost:{args.port}/api/v1/transcribe") print(f" POST http://localhost:{args.port}/api/v1/query") print(f" GET http://localhost:{args.port}/docs (Swagger UI)") print() uvicorn.run( "src.api.app:app", host=args.host, port=args.port, workers=1, # Single worker: GPU model shared in memory reload=args.reload, log_level="info", )