File size: 3,232 Bytes
942050b
 
 
 
 
 
d48602c
942050b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d48602c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
942050b
 
 
 
 
 
 
 
 
 
 
d48602c
942050b
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from functools import lru_cache
from typing import Literal

from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict

ProviderName = Literal["mistral", "github_models", "groq", "ollama", "openrouter"]


class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        env_prefix="NL_SQL_",
        extra="ignore",
        case_sensitive=False,
        populate_by_name=True,
    )

    log_level: str = "INFO"

    default_provider: ProviderName = "mistral"
    frontier_provider: ProviderName = "groq"  # GitHub Models needs fine-grained PAT
    local_provider: ProviderName = "ollama"

    mistral_gen_model: str = "codestral-latest"
    mistral_nl_model: str = "mistral-large-latest"
    mistral_embed_model: str = "mistral-embed"
    mistral_base_url: str = "https://api.mistral.ai/v1"

    github_models_model: str = "openai/gpt-4o-mini"
    github_models_base_url: str = "https://models.github.ai/inference"

    groq_model: str = "llama-3.3-70b-versatile"
    groq_base_url: str = "https://api.groq.com/openai/v1"

    ollama_gen_model: str = "qwen2.5-coder:7b-instruct"
    ollama_base_url: str = "http://localhost:11434/v1"
    ollama_timeout_seconds: float = 180.0

    # OpenRouter β€” heterogeneous-CSC slot. Default = deepseek-v4-flash:free
    # (DeepSeek family, β‰  Mistral β€” needed so self-consistency votes don't
    # collapse into one model's blind spots, as happened in config F + CSC
    # merge-revision saturation on homogeneous codestral). Earlier picks
    # rejected during 2026-05-20 probe:
    #   - z-ai/glm-4.5-air:free β†’ reasoning model, 2186 reasoning_tokens
    #     consumed the whole budget, content=empty (smoke5 β†’ 0% EA).
    #   - qwen/qwen3-coder:free β†’ Venice provider 429-loop (free quota).
    # deepseek-v4-flash:free returned valid JSON+SQL on probe (LIMIT/OFFSET
    # correct for 7th-row case). Other live free models cycle; check
    # `D:\TXT\Free API Keys.txt` / smoke before switching.
    openrouter_model: str = "deepseek/deepseek-v4-flash:free"
    openrouter_base_url: str = "https://openrouter.ai/api/v1"

    # Perplexity browser path via local GraceKelly (D:\GraceKells). Free
    # because it rides the user's Perplexity Pro subscription via Playwright.
    # `claude-sonnet-4-6` here is the Perplexity menu label, not the
    # Anthropic API model id β€” GraceKelly resolves it to the browser path.
    perplexity_browser_model: str = "claude-sonnet-4-6"
    perplexity_base_url: str = "http://127.0.0.1:8011"

    mistral_api_key: str = Field(default="", validation_alias="MISTRAL_API_KEY")
    github_token: str = Field(default="", validation_alias="GITHUB_TOKEN")
    groq_api_key: str = Field(default="", validation_alias="GROQ_API_KEY")
    openrouter_api_key: str = Field(default="", validation_alias="OPENROUTER_API_KEY")

    # diskcache for LLM generate/embed responses (per docs/02_architecture_v2.md Β§6.5).
    # Two subdirs ("gen", "embed") are created under this root by `nl_sql.llm.cache`.
    llm_cache_dir: str = ".cache/llm"
    llm_cache_size_limit_gb: int = 4


@lru_cache(maxsize=1)
def get_settings() -> Settings:
    return Settings()