from functools import lru_cache from typing import Literal from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict ProviderName = Literal["mistral", "github_models", "groq", "ollama", "openrouter"] class Settings(BaseSettings): model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", env_prefix="NL_SQL_", extra="ignore", case_sensitive=False, populate_by_name=True, ) log_level: str = "INFO" default_provider: ProviderName = "mistral" frontier_provider: ProviderName = "groq" # GitHub Models needs fine-grained PAT local_provider: ProviderName = "ollama" mistral_gen_model: str = "codestral-latest" mistral_nl_model: str = "mistral-large-latest" mistral_embed_model: str = "mistral-embed" mistral_base_url: str = "https://api.mistral.ai/v1" github_models_model: str = "openai/gpt-4o-mini" github_models_base_url: str = "https://models.github.ai/inference" groq_model: str = "llama-3.3-70b-versatile" groq_base_url: str = "https://api.groq.com/openai/v1" ollama_gen_model: str = "qwen2.5-coder:7b-instruct" ollama_base_url: str = "http://localhost:11434/v1" ollama_timeout_seconds: float = 180.0 # OpenRouter — heterogeneous-CSC slot. Default = deepseek-v4-flash:free # (DeepSeek family, ≠ Mistral — needed so self-consistency votes don't # collapse into one model's blind spots, as happened in config F + CSC # merge-revision saturation on homogeneous codestral). Earlier picks # rejected during 2026-05-20 probe: # - z-ai/glm-4.5-air:free → reasoning model, 2186 reasoning_tokens # consumed the whole budget, content=empty (smoke5 → 0% EA). # - qwen/qwen3-coder:free → Venice provider 429-loop (free quota). # deepseek-v4-flash:free returned valid JSON+SQL on probe (LIMIT/OFFSET # correct for 7th-row case). Other live free models cycle; check # `D:\TXT\Free API Keys.txt` / smoke before switching. openrouter_model: str = "deepseek/deepseek-v4-flash:free" openrouter_base_url: str = "https://openrouter.ai/api/v1" # Perplexity browser path via local GraceKelly (D:\GraceKells). Free # because it rides the user's Perplexity Pro subscription via Playwright. # `claude-sonnet-4-6` here is the Perplexity menu label, not the # Anthropic API model id — GraceKelly resolves it to the browser path. perplexity_browser_model: str = "claude-sonnet-4-6" perplexity_base_url: str = "http://127.0.0.1:8011" mistral_api_key: str = Field(default="", validation_alias="MISTRAL_API_KEY") github_token: str = Field(default="", validation_alias="GITHUB_TOKEN") groq_api_key: str = Field(default="", validation_alias="GROQ_API_KEY") openrouter_api_key: str = Field(default="", validation_alias="OPENROUTER_API_KEY") # diskcache for LLM generate/embed responses (per docs/02_architecture_v2.md §6.5). # Two subdirs ("gen", "embed") are created under this root by `nl_sql.llm.cache`. llm_cache_dir: str = ".cache/llm" llm_cache_size_limit_gb: int = 4 @lru_cache(maxsize=1) def get_settings() -> Settings: return Settings()