| |
| """ |
| Stack 2.9 Model Client |
| Unified API client for Ollama, OpenAI, Anthropic, and other LLM backends. |
| """ |
|
|
| import os |
| import json |
| import time |
| import logging |
| from pathlib import Path |
| from typing import Dict, List, Any, Optional, Callable |
| from dataclasses import dataclass |
| from abc import ABC, abstractmethod |
|
|
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| @dataclass |
| class GenerationResult: |
| """Result from model generation.""" |
| text: str |
| model: str |
| tokens: int |
| duration: float |
| finish_reason: str |
| raw_response: Optional[Dict] = None |
|
|
|
|
| @dataclass |
| class ChatMessage: |
| """Chat message structure.""" |
| role: str |
| content: str |
| tool_calls: Optional[List[Dict]] = None |
| tool_call_id: Optional[str] = None |
|
|
|
|
| class BaseModelClient(ABC): |
| """Abstract base class for model clients.""" |
|
|
| @abstractmethod |
| def generate( |
| self, |
| prompt: str, |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| stop: Optional[List[str]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate text from a prompt.""" |
| pass |
|
|
| @abstractmethod |
| def chat( |
| self, |
| messages: List[ChatMessage], |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| tools: Optional[List[Dict]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate response from chat messages.""" |
| pass |
|
|
| @abstractmethod |
| def get_model_name(self) -> str: |
| """Get the model name.""" |
| pass |
|
|
|
|
| class OllamaClient(BaseModelClient): |
| """Client for Ollama local API.""" |
|
|
| def __init__( |
| self, |
| model: str = "qwen2.5-coder:32b", |
| base_url: str = "http://localhost:11434", |
| timeout: int = 300 |
| ): |
| self.model = model |
| self.base_url = base_url.rstrip('/') |
| self.timeout = timeout |
|
|
| def generate( |
| self, |
| prompt: str, |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| stop: Optional[List[str]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate text using Ollama.""" |
| import requests |
|
|
| url = f"{self.base_url}/api/generate" |
| payload = { |
| "model": self.model, |
| "prompt": prompt, |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| "stream": False |
| } |
| if stop: |
| payload["stop"] = stop |
|
|
| start_time = time.time() |
|
|
| try: |
| response = requests.post(url, json=payload, timeout=self.timeout) |
| response.raise_for_status() |
| data = response.json() |
|
|
| duration = time.time() - start_time |
|
|
| return GenerationResult( |
| text=data.get("response", ""), |
| model=self.model, |
| tokens=data.get("eval_count", 0), |
| duration=duration, |
| finish_reason=data.get("done_reason", "stop"), |
| raw_response=data |
| ) |
| except requests.exceptions.RequestException as e: |
| logger.error(f"Ollama request failed: {e}") |
| raise |
|
|
| def chat( |
| self, |
| messages: List[ChatMessage], |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| tools: Optional[List[Dict]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate chat response using Ollama.""" |
| import requests |
|
|
| url = f"{self.base_url}/api/chat" |
| payload = { |
| "model": self.model, |
| "messages": [ |
| {"role": m.role, "content": m.content} |
| for m in messages |
| ], |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| "stream": False |
| } |
|
|
| if tools: |
| payload["tools"] = tools |
|
|
| start_time = time.time() |
|
|
| try: |
| response = requests.post(url, json=payload, timeout=self.timeout) |
| response.raise_for_status() |
| data = response.json() |
|
|
| duration = time.time() - start_time |
|
|
| |
| msg = data.get("message", {}) |
| text = msg.get("content", "") |
|
|
| return GenerationResult( |
| text=text, |
| model=self.model, |
| tokens=data.get("eval_count", 0), |
| duration=duration, |
| finish_reason=data.get("done_reason", "stop"), |
| raw_response=data |
| ) |
| except requests.exceptions.RequestException as e: |
| logger.error(f"Ollama chat request failed: {e}") |
| raise |
|
|
| def get_model_name(self) -> str: |
| return self.model |
|
|
|
|
| class OpenAIClient(BaseModelClient): |
| """Client for OpenAI API.""" |
|
|
| def __init__( |
| self, |
| model: str = "gpt-4o", |
| api_key: Optional[str] = None, |
| base_url: Optional[str] = None, |
| timeout: int = 120 |
| ): |
| self.model = model |
| self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "") |
| self.base_url = base_url or os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1") |
| self.timeout = timeout |
|
|
| if not self.api_key: |
| raise ValueError("OpenAI API key required. Set OPENAI_API_KEY environment variable.") |
|
|
| def _get_client(self): |
| """Get OpenAI client.""" |
| try: |
| from openai import OpenAI |
| return OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=self.timeout) |
| except ImportError: |
| raise ImportError("openai package required. Install with: pip install openai") |
|
|
| def generate( |
| self, |
| prompt: str, |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| stop: Optional[List[str]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate text using OpenAI.""" |
| client = self._get_client() |
|
|
| start_time = time.time() |
|
|
| try: |
| response = client.completions.create( |
| model=self.model, |
| prompt=prompt, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| stop=stop, |
| **kwargs |
| ) |
|
|
| duration = time.time() - start_time |
|
|
| return GenerationResult( |
| text=response.choices[0].text, |
| model=self.model, |
| tokens=response.usage.completion_tokens, |
| duration=duration, |
| finish_reason=response.choices[0].finish_reason, |
| raw_response=response.model_dump() |
| ) |
| except Exception as e: |
| logger.error(f"OpenAI request failed: {e}") |
| raise |
|
|
| def chat( |
| self, |
| messages: List[ChatMessage], |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| tools: Optional[List[Dict]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate chat response using OpenAI.""" |
| client = self._get_client() |
|
|
| |
| chat_messages = [] |
| for msg in messages: |
| msg_dict = {"role": msg.role, "content": msg.content} |
| if msg.tool_calls: |
| msg_dict["tool_calls"] = msg.tool_calls |
| if msg.tool_call_id: |
| msg_dict["tool_call_id"] = msg.tool_call_id |
| chat_messages.append(msg_dict) |
|
|
| |
| request_params = { |
| "model": self.model, |
| "messages": chat_messages, |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| } |
|
|
| if tools: |
| request_params["tools"] = tools |
|
|
| request_params.update(kwargs) |
|
|
| start_time = time.time() |
|
|
| try: |
| response = client.chat.completions.create(**request_params) |
|
|
| duration = time.time() - start_time |
|
|
| msg = response.choices[0].message |
| text = msg.content or "" |
|
|
| return GenerationResult( |
| text=text, |
| model=self.model, |
| tokens=response.usage.completion_tokens, |
| duration=duration, |
| finish_reason=response.choices[0].finish_reason, |
| raw_response=response.model_dump() |
| ) |
| except Exception as e: |
| logger.error(f"OpenAI chat request failed: {e}") |
| raise |
|
|
| def get_model_name(self) -> str: |
| return self.model |
|
|
|
|
| class AnthropicClient(BaseModelClient): |
| """Client for Anthropic API.""" |
|
|
| def __init__( |
| self, |
| model: str = "claude-sonnet-4-20250514", |
| api_key: Optional[str] = None, |
| timeout: int = 120 |
| ): |
| self.model = model |
| self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "") |
|
|
| if not self.api_key: |
| raise ValueError("Anthropic API key required. Set ANTHROPIC_API_KEY environment variable.") |
|
|
| def _get_client(self): |
| """Get Anthropic client.""" |
| try: |
| from anthropic import Anthropic |
| return Anthropic(api_key=self.api_key) |
| except ImportError: |
| raise ImportError("anthropic package required. Install with: pip install anthropic") |
|
|
| def generate( |
| self, |
| prompt: str, |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate text using Anthropic.""" |
| client = self._get_client() |
|
|
| |
| system = kwargs.pop("system", None) |
| if system: |
| messages = [{"role": "user", "content": prompt}] |
| messages = [{"role": "system", "content": system}] + messages |
| else: |
| messages = [{"role": "user", "content": prompt}] |
|
|
| start_time = time.time() |
|
|
| try: |
| response = client.messages.create( |
| model=self.model, |
| system=system, |
| messages=messages, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| **kwargs |
| ) |
|
|
| duration = time.time() - start_time |
|
|
| text = response.content[0].text if response.content else "" |
|
|
| return GenerationResult( |
| text=text, |
| model=self.model, |
| tokens=response.usage.output_tokens, |
| duration=duration, |
| finish_reason=response.stop_reason, |
| raw_response=response.model_dump() |
| ) |
| except Exception as e: |
| logger.error(f"Anthropic request failed: {e}") |
| raise |
|
|
| def chat( |
| self, |
| messages: List[ChatMessage], |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| tools: Optional[List[Dict]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate chat response using Anthropic.""" |
| client = self._get_client() |
|
|
| |
| |
| system = None |
| anthropic_messages = [] |
|
|
| for msg in messages: |
| if msg.role == "system": |
| system = msg.content |
| else: |
| anthropic_messages.append({"role": msg.role, "content": msg.content}) |
|
|
| request_params = { |
| "model": self.model, |
| "messages": anthropic_messages, |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| } |
|
|
| if system: |
| request_params["system"] = system |
|
|
| if tools: |
| request_params["tools"] = tools |
|
|
| request_params.update(kwargs) |
|
|
| start_time = time.time() |
|
|
| try: |
| response = client.messages.create(**request_params) |
|
|
| duration = time.time() - start_time |
|
|
| text = response.content[0].text if response.content else "" |
|
|
| return GenerationResult( |
| text=text, |
| model=self.model, |
| tokens=response.usage.output_tokens, |
| duration=duration, |
| finish_reason=response.stop_reason, |
| raw_response=response.model_dump() |
| ) |
| except Exception as e: |
| logger.error(f"Anthropic chat request failed: {e}") |
| raise |
|
|
| def get_model_name(self) -> str: |
| return self.model |
|
|
|
|
| class TogetherClient(BaseModelClient): |
| """Client for Together AI API (OpenAI-compatible).""" |
|
|
| def __init__( |
| self, |
| model: str = "togethercomputer/Qwen2.5-Coder-32B-Instruct", |
| api_key: Optional[str] = None, |
| base_url: str = "https://api.together.xyz/v1", |
| timeout: int = 120 |
| ): |
| self.model = model |
| self.api_key = api_key or os.environ.get("TOGETHER_API_KEY", "") |
| self.base_url = base_url |
| self.timeout = timeout |
|
|
| if not self.api_key: |
| raise ValueError("Together API key required. Set TOGETHER_API_KEY environment variable.") |
|
|
| def _get_client(self): |
| """Get OpenAI-compatible client.""" |
| try: |
| from openai import OpenAI |
| return OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=self.timeout) |
| except ImportError: |
| raise ImportError("openai package required. Install with: pip install openai") |
|
|
| def generate( |
| self, |
| prompt: str, |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| stop: Optional[List[str]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate text using Together.""" |
| client = self._get_client() |
|
|
| start_time = time.time() |
|
|
| try: |
| response = client.completions.create( |
| model=self.model, |
| prompt=prompt, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| stop=stop, |
| **kwargs |
| ) |
|
|
| duration = time.time() - start_time |
|
|
| return GenerationResult( |
| text=response.choices[0].text, |
| model=self.model, |
| tokens=response.usage.completion_tokens, |
| duration=duration, |
| finish_reason=response.choices[0].finish_reason, |
| raw_response=response.model_dump() |
| ) |
| except Exception as e: |
| logger.error(f"Together request failed: {e}") |
| raise |
|
|
| def chat( |
| self, |
| messages: List[ChatMessage], |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| tools: Optional[List[Dict]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate chat response using Together.""" |
| client = self._get_client() |
|
|
| |
| chat_messages = [{"role": m.role, "content": m.content} for m in messages] |
|
|
| request_params = { |
| "model": self.model, |
| "messages": chat_messages, |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| } |
|
|
| if tools: |
| request_params["tools"] = tools |
|
|
| request_params.update(kwargs) |
|
|
| start_time = time.time() |
|
|
| try: |
| response = client.chat.completions.create(**request_params) |
|
|
| duration = time.time() - start_time |
|
|
| msg = response.choices[0].message |
| text = msg.content or "" |
|
|
| result = GenerationResult( |
| text=text, |
| model=self.model, |
| tokens=response.usage.completion_tokens, |
| duration=duration, |
| finish_reason=response.choices[0].finish_reason, |
| raw_response=response.model_dump() |
| ) |
|
|
| return result |
| except Exception as e: |
| logger.error(f"Together chat request failed: {e}") |
| raise |
|
|
| def get_model_name(self) -> str: |
| return self.model |
|
|
|
|
| class OpenRouterClient(BaseModelClient): |
| """Client for OpenRouter API (unified interface for multiple models).""" |
|
|
| def __init__( |
| self, |
| model: str = "qwen/qwen2.5-coder-32b", |
| api_key: Optional[str] = None, |
| base_url: str = "https://openrouter.ai/api/v1", |
| timeout: int = 120, |
| http_referer: Optional[str] = None, |
| x_title: Optional[str] = None |
| ): |
| self.model = model |
| self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY", "") |
| self.base_url = base_url |
| self.timeout = timeout |
| self.http_referer = http_referer or os.environ.get("HTTP_REFERER", "") |
| self.x_title = x_title or os.environ.get("X_TITLE", "Stack 2.9") |
|
|
| if not self.api_key: |
| raise ValueError("OpenRouter API key required. Set OPENROUTER_API_KEY environment variable.") |
|
|
| def _get_client(self): |
| """Get OpenAI-compatible client.""" |
| try: |
| from openai import OpenAI |
| return OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=self.timeout) |
| except ImportError: |
| raise ImportError("openai package required. Install with: pip install openai") |
|
|
| def generate( |
| self, |
| prompt: str, |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| stop: Optional[List[str]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate text using OpenRouter.""" |
| client = self._get_client() |
|
|
| start_time = time.time() |
|
|
| try: |
| response = client.completions.create( |
| model=self.model, |
| prompt=prompt, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| stop=stop, |
| **kwargs |
| ) |
|
|
| duration = time.time() - start_time |
|
|
| result = GenerationResult( |
| text=response.choices[0].text, |
| model=self.model, |
| tokens=response.usage.completion_tokens, |
| duration=duration, |
| finish_reason=response.choices[0].finish_reason, |
| raw_response=response.model_dump() |
| ) |
|
|
| return result |
| except Exception as e: |
| logger.error(f"OpenRouter request failed: {e}") |
| raise |
|
|
| def chat( |
| self, |
| messages: List[ChatMessage], |
| temperature: float = 0.2, |
| max_tokens: int = 4096, |
| tools: Optional[List[Dict]] = None, |
| **kwargs |
| ) -> GenerationResult: |
| """Generate chat response using OpenRouter.""" |
| client = self._get_client() |
|
|
| |
| chat_messages = [{"role": m.role, "content": m.content} for m in messages] |
|
|
| request_params = { |
| "model": self.model, |
| "messages": chat_messages, |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| } |
|
|
| if tools: |
| request_params["tools"] = tools |
|
|
| request_params.update(kwargs) |
|
|
| |
| extra_headers = {} |
| if self.http_referer: |
| extra_headers["HTTP-Referer"] = self.http_referer |
| if self.x_title: |
| extra_headers["X-Title"] = self.x_title |
|
|
| start_time = time.time() |
|
|
| try: |
| response = client.chat.completions.create( |
| extra_headers=extra_headers if extra_headers else None, |
| **request_params |
| ) |
|
|
| duration = time.time() - start_time |
|
|
| msg = response.choices[0].message |
| text = msg.content or "" |
|
|
| result = GenerationResult( |
| text=text, |
| model=self.model, |
| tokens=response.usage.completion_tokens, |
| duration=duration, |
| finish_reason=response.choices[0].finish_reason, |
| raw_response=response.model_dump() |
| ) |
|
|
| return result |
| except Exception as e: |
| logger.error(f"OpenRouter chat request failed: {e}") |
| raise |
|
|
| def get_model_name(self) -> str: |
| return self.model |
|
|
|
|
|
|
| def create_model_client( |
| provider: str = "ollama", |
| model: Optional[str] = None, |
| **kwargs |
| ) -> BaseModelClient: |
| """ |
| Factory function to create model client. |
| |
| Args: |
| provider: One of "ollama", "openai", "anthropic", "openrouter", "together" |
| model: Model name (defaults to provider's default) |
| **kwargs: Additional client configuration |
| |
| Returns: |
| BaseModelClient instance |
| """ |
| if provider == "ollama": |
| default_model = model or os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:32b") |
| return OllamaClient(model=default_model, **kwargs) |
| elif provider == "openai": |
| default_model = model or os.environ.get("OPENAI_MODEL", "gpt-4o") |
| return OpenAIClient(model=default_model, **kwargs) |
| elif provider == "anthropic": |
| default_model = model or os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-20250514") |
| return AnthropicClient(model=default_model, **kwargs) |
| elif provider == "openrouter": |
| default_model = model or os.environ.get("OPENROUTER_MODEL", "qwen/qwen2.5-coder-32b") |
| return OpenRouterClient(model=default_model, **kwargs) |
| elif provider == "together": |
| default_model = model or os.environ.get("TOGETHER_MODEL", "togethercomputer/Qwen2.5-Coder-32B-Instruct") |
| return TogetherClient(model=default_model, **kwargs) |
| else: |
| raise ValueError(f"Unknown provider: {provider}. Use: ollama, openai, anthropic, openrouter, together") |
|
|
|
|
| class ModelClientPool: |
| """Pool of model clients for different purposes.""" |
|
|
| def __init__(self): |
| self.clients: Dict[str, BaseModelClient] = {} |
|
|
| def add_client(self, name: str, client: BaseModelClient): |
| """Add a client to the pool.""" |
| self.clients[name] = client |
|
|
| def get_client(self, name: str = "default") -> BaseModelClient: |
| """Get client by name.""" |
| if name not in self.clients: |
| |
| provider = os.environ.get("MODEL_PROVIDER", "ollama") |
| self.clients[name] = create_model_client(provider) |
| return self.clients[name] |
|
|
| def generate( |
| self, |
| prompt: str, |
| client_name: str = "default", |
| **kwargs |
| ) -> GenerationResult: |
| """Generate using named client.""" |
| return self.get_client(client_name).generate(prompt, **kwargs) |
|
|
| def chat( |
| self, |
| messages: List[ChatMessage], |
| client_name: str = "default", |
| **kwargs |
| ) -> GenerationResult: |
| """Chat using named client.""" |
| return self.get_client(client_name).chat(messages, **kwargs) |
|
|
|
|
| |
| _default_pool = None |
|
|
| def get_default_pool() -> ModelClientPool: |
| """Get default model client pool.""" |
| global _default_pool |
| if _default_pool is None: |
| _default_pool = ModelClientPool() |
| return _default_pool |
|
|
|
|
| if __name__ == "__main__": |
| import argparse |
|
|
| parser = argparse.ArgumentParser(description="Stack 2.9 Model Client") |
| parser.add_argument("--provider", choices=["ollama", "openai", "anthropic", "openrouter", "together"], |
| default="ollama", help="Model provider") |
| parser.add_argument("--model", type=str, help="Model name") |
| parser.add_argument("--prompt", type=str, required=True, help="Prompt to generate") |
| parser.add_argument("--temperature", type=float, default=0.2, help="Temperature") |
|
|
| args = parser.parse_args() |
|
|
| |
| client = create_model_client(args.provider, args.model) |
|
|
| print(f"Using model: {client.get_model_name()}") |
| print(f"Provider: {args.provider}") |
| print("-" * 40) |
|
|
| |
| result = client.generate(args.prompt, temperature=args.temperature) |
|
|
| print(f"Response:\n{result.text}") |
| print("-" * 40) |
| print(f"Tokens: {result.tokens}, Duration: {result.duration:.2f}s") |