Spaces:
Running
Running
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """Endpoint shorthand catalog. | |
| Lets the MCP tool ``run_rollout`` and the HTMX UI accept a short endpoint | |
| label (``vllm`` / ``openai`` / ``hf_router``) and resolve the actual | |
| ``base_url`` / ``api_key`` / ``model`` from environment variables (with | |
| sane defaults). Explicit overrides on the call always win. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from dataclasses import dataclass | |
| ENDPOINT_KINDS = ("vllm", "openai", "hf_router") | |
| class _EndpointSpec: | |
| base_url_env: str | |
| api_key_env: str | |
| model_env: str | |
| default_base_url: str | None | |
| default_api_key: str | None | |
| default_model: str | None | |
| disable_thinking_default: bool | |
| _CATALOG: dict[str, _EndpointSpec] = { | |
| "vllm": _EndpointSpec( | |
| base_url_env="VLLM_URL", | |
| api_key_env="VLLM_API_KEY", | |
| model_env="VLLM_MODEL", | |
| default_base_url=None, # cluster URL must be set | |
| default_api_key="intercepted", # vLLM rarely enforces auth | |
| default_model="Qwen/Qwen3.5-4B", | |
| disable_thinking_default=True, # Qwen3.5 thinks by default | |
| ), | |
| "openai": _EndpointSpec( | |
| base_url_env="OPENAI_BASE_URL", | |
| api_key_env="OPENAI_API_KEY", | |
| model_env="OPENAI_MODEL", | |
| default_base_url="https://api.openai.com/v1", | |
| default_api_key=None, | |
| default_model="gpt-4o-mini", | |
| disable_thinking_default=False, # OpenAI rejects unknown kwargs | |
| ), | |
| "hf_router": _EndpointSpec( | |
| base_url_env="HF_ROUTER_BASE_URL", | |
| api_key_env="HF_ROUTER_API_KEY", | |
| model_env="HF_ROUTER_MODEL", | |
| default_base_url="https://router.huggingface.co/v1", | |
| default_api_key=None, | |
| default_model="Qwen/Qwen3-4B-Instruct-2507:nscale", | |
| disable_thinking_default=False, # Instruct variant doesn't think | |
| ), | |
| } | |
| class ResolvedEndpoint: | |
| kind: str | |
| base_url: str | |
| api_key: str | |
| model: str | |
| disable_thinking_default: bool | |
| def resolve_endpoint( | |
| kind: str, | |
| *, | |
| base_url: str = "", | |
| api_key: str = "", | |
| model: str = "", | |
| ) -> ResolvedEndpoint: | |
| """Resolve an endpoint shorthand into concrete (base_url, api_key, model). | |
| Precedence per field: **explicit arg > env var > catalog default**. | |
| Always normalizes to a ``/v1`` base URL. | |
| Raises ``ValueError`` for unknown kinds, missing creds, or missing model. | |
| """ | |
| spec = _CATALOG.get(kind) | |
| if spec is None: | |
| raise ValueError( | |
| f"unknown endpoint kind: {kind!r}; expected one of {ENDPOINT_KINDS}" | |
| ) | |
| base = ( | |
| base_url or os.environ.get(spec.base_url_env) or spec.default_base_url or "" | |
| ).rstrip("/") | |
| if not base: | |
| raise ValueError( | |
| f"{kind}: no base_url (set {spec.base_url_env} env var or pass " | |
| "base_url=...)" | |
| ) | |
| if not base.endswith("/v1"): | |
| base = f"{base}/v1" | |
| key = api_key or os.environ.get(spec.api_key_env) or spec.default_api_key or "" | |
| if not key: | |
| raise ValueError( | |
| f"{kind}: no api_key (set {spec.api_key_env} env var or pass api_key=...)" | |
| ) | |
| mdl = model or os.environ.get(spec.model_env) or spec.default_model or "" | |
| if not mdl: | |
| raise ValueError( | |
| f"{kind}: no model (set {spec.model_env} env var or pass model=...)" | |
| ) | |
| return ResolvedEndpoint( | |
| kind=kind, | |
| base_url=base, | |
| api_key=key, | |
| model=mdl, | |
| disable_thinking_default=spec.disable_thinking_default, | |
| ) | |
| def catalog_summary() -> list[dict[str, object]]: | |
| """Return a JSON-friendly view of the catalog (for the UI dropdown).""" | |
| out: list[dict[str, object]] = [] | |
| for kind, spec in _CATALOG.items(): | |
| out.append( | |
| { | |
| "kind": kind, | |
| "base_url_env": spec.base_url_env, | |
| "api_key_env": spec.api_key_env, | |
| "model_env": spec.model_env, | |
| "default_base_url": spec.default_base_url, | |
| "default_model": spec.default_model, | |
| "disable_thinking_default": spec.disable_thinking_default, | |
| "configured": _is_configured(spec), | |
| } | |
| ) | |
| return out | |
| def _is_configured(spec: _EndpointSpec) -> bool: | |
| base = os.environ.get(spec.base_url_env) or spec.default_base_url or "" | |
| key = os.environ.get(spec.api_key_env) or spec.default_api_key or "" | |
| model = os.environ.get(spec.model_env) or spec.default_model or "" | |
| return bool(base and key and model) | |