| """ |
| Configuration settings for DocGenie API |
| """ |
| import os |
| from typing import Optional, List |
|
|
|
|
| class Settings: |
| """API configuration settings""" |
| |
| |
| ANTHROPIC_API_KEY: str = os.getenv("ANTHROPIC_API_KEY", "") |
| CLAUDE_MODEL: str = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-5-20250929") |
| |
| LLM_MODEL: str = os.getenv("LLM_MODEL", CLAUDE_MODEL) |
| |
| |
| HANDWRITING_SERVICE_URL: str = os.getenv( |
| "HANDWRITING_SERVICE_URL", |
| "http://localhost:8080" |
| ) |
| RUNPOD_API_KEY: str = os.getenv("RUNPOD_API_KEY", "") |
| HANDWRITING_SERVICE_TIMEOUT: int = int(os.getenv("HANDWRITING_SERVICE_TIMEOUT", "300")) |
| HANDWRITING_SERVICE_MAX_RETRIES: int = int(os.getenv("HANDWRITING_SERVICE_MAX_RETRIES", "3")) |
| HANDWRITING_SERVICE_ENABLED: bool = os.getenv("HANDWRITING_SERVICE_ENABLED", "false").lower() == "true" |
| HANDWRITING_SERVICE_SUPPORTS_BATCH: bool = os.getenv("HANDWRITING_SERVICE_SUPPORTS_BATCH", "true").lower() == "true" |
| |
| |
| OCR_SERVICE_URL: str = os.getenv("OCR_SERVICE_URL", "http://localhost:8000") |
| OCR_SERVICE_TIMEOUT: int = int(os.getenv("OCR_SERVICE_TIMEOUT", "30")) |
| OCR_SERVICE_ENABLED: bool = os.getenv("OCR_SERVICE_ENABLED", "false").lower() == "true" |
| OCR_ENGINE: str = os.getenv("OCR_ENGINE", "microsoft_di") |
| OCR_DPI: int = int(os.getenv("OCR_DPI", "300")) |
| |
| |
| OCR_USE_LOCAL: bool = os.getenv("OCR_USE_LOCAL", "false").lower() == "true" |
| OCR_TESSERACT_LANG: str = os.getenv("OCR_TESSERACT_LANG", "eng") |
| OCR_TESSERACT_CONFIG: str = os.getenv("OCR_TESSERACT_CONFIG", "--psm 3") |
| |
| |
| |
| BBOX_NORMALIZATION_ENABLED: bool = os.getenv("BBOX_NORMALIZATION_ENABLED", "false").lower() == "true" |
| BBOX_NORMALIZATION_SCALE: str = os.getenv("BBOX_NORMALIZATION_SCALE", "0-1") |
| |
| |
| GT_VERIFICATION_ENABLED: bool = os.getenv("GT_VERIFICATION_ENABLED", "false").lower() == "true" |
| GT_VERIFICATION_SIMILARITY_CUTOFF: float = float(os.getenv("GT_VERIFICATION_SIMILARITY_CUTOFF", "0.8")) |
| GT_VERIFICATION_OVERLAP_THRESHOLD: float = float(os.getenv("GT_VERIFICATION_OVERLAP_THRESHOLD", "0.5")) |
| |
| |
| ANALYSIS_ENABLED: bool = os.getenv("ANALYSIS_ENABLED", "false").lower() == "true" |
| ANALYSIS_MIN_ANNOTATION_COUNT: int = int(os.getenv("ANALYSIS_MIN_ANNOTATION_COUNT", "1")) |
| |
| |
| DEBUG_VISUALIZATION_ENABLED: bool = os.getenv("DEBUG_VISUALIZATION_ENABLED", "false").lower() == "true" |
| DEBUG_SHOW_TEXT_IN_BBOX: bool = os.getenv("DEBUG_SHOW_TEXT_IN_BBOX", "true").lower() == "true" |
| DEBUG_BBOX_COLOR_RGB: str = os.getenv("DEBUG_BBOX_COLOR_RGB", "255,0,0") |
| |
| |
| DATASET_EXPORT_ENABLED: bool = os.getenv("DATASET_EXPORT_ENABLED", "false").lower() == "true" |
| DATASET_EXPORT_FORMAT: str = os.getenv("DATASET_EXPORT_FORMAT", "msgpack") |
| DATASET_EXPORT_DIR: str = os.getenv("DATASET_EXPORT_DIR", "/tmp/docgenie_datasets") |
| DATASET_RESIZE_IMAGES: bool = os.getenv("DATASET_RESIZE_IMAGES", "false").lower() == "true" |
| DATASET_CLIP_BBOXES_TO_FOREGROUND: bool = os.getenv("DATASET_CLIP_BBOXES_TO_FOREGROUND", "false").lower() == "true" |
| |
| |
| API_HOST: str = os.getenv("API_HOST", "0.0.0.0") |
| API_PORT: int = int(os.getenv("API_PORT", "8000")) |
| DEBUG_MODE: bool = os.getenv("DEBUG_MODE", "false").lower() == "true" |
| |
| |
| CORS_ORIGINS: List[str] = [ |
| origin.strip() |
| for origin in os.getenv("CORS_ORIGINS", "*").split(",") |
| if origin.strip() |
| ] or ["*"] |
| |
| |
| TEMP_DIR: str = os.getenv("TEMP_DIR", "/tmp/docgenie_api") |
| |
| |
| LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO") |
| |
| |
| DATABASE_URL: Optional[str] = os.getenv("DATABASE_URL", None) |
| REDIS_URL: Optional[str] = os.getenv("REDIS_URL", "redis://localhost:6379/0") |
| |
| |
| SUPABASE_URL: str = os.getenv("SUPABASE_URL", "") |
| SUPABASE_KEY: str = os.getenv("SUPABASE_KEY", "") |
| |
| |
| RQ_QUEUE_NAME: str = os.getenv("RQ_QUEUE_NAME", "docgenie") |
| BATCH_POLL_INTERVAL: int = int(os.getenv("BATCH_POLL_INTERVAL", "30")) |
| BATCH_PROMPT_CHUNK_SIZE: int = int(os.getenv("BATCH_PROMPT_CHUNK_SIZE", "4")) |
| BATCH_DATA_DIR: str = os.getenv("BATCH_DATA_DIR", "/tmp/docgenie_batches") |
| MESSAGE_DATA_DIR: str = os.getenv("MESSAGE_DATA_DIR", "/tmp/docgenie_messages") |
| |
| |
| GOOGLE_DRIVE_FOLDER_NAME: str = os.getenv("GOOGLE_DRIVE_FOLDER_NAME", "DocGenie Documents") |
| GOOGLE_CLIENT_ID: Optional[str] = os.getenv("GOOGLE_CLIENT_ID", None) |
| GOOGLE_CLIENT_SECRET: Optional[str] = os.getenv("GOOGLE_CLIENT_SECRET", None) |
| |
| |
| SENTRY_DSN: Optional[str] = os.getenv("SENTRY_DSN", None) |
| ENABLE_METRICS: bool = os.getenv("ENABLE_METRICS", "false").lower() == "true" |
| METRICS_PORT: int = int(os.getenv("METRICS_PORT", "9090")) |
| |
| |
| AWS_ACCESS_KEY_ID: Optional[str] = os.getenv("AWS_ACCESS_KEY_ID", None) |
| AWS_SECRET_ACCESS_KEY: Optional[str] = os.getenv("AWS_SECRET_ACCESS_KEY", None) |
| AWS_REGION: str = os.getenv("AWS_REGION", "us-east-1") |
| S3_BUCKET: Optional[str] = os.getenv("S3_BUCKET", None) |
| |
| @classmethod |
| def validate(cls) -> bool: |
| """Validate required settings""" |
| if not cls.ANTHROPIC_API_KEY: |
| raise ValueError("ANTHROPIC_API_KEY environment variable is required") |
| return True |
| |
| @classmethod |
| def get_cors_origins(cls) -> List[str]: |
| """Get CORS origins list""" |
| return cls.CORS_ORIGINS if cls.CORS_ORIGINS != ["*"] else ["*"] |
|
|
|
|
| settings = Settings() |
|
|