Stack-2-9-finetuned / stack /eval /model_client.py
walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
#!/usr/bin/env python3
"""
Stack 2.9 Model Client
Unified API client for Ollama, OpenAI, Anthropic, and other LLM backends.
"""
import os
import json
import time
import logging
from pathlib import Path
from typing import Dict, List, Any, Optional, Callable
from dataclasses import dataclass
from abc import ABC, abstractmethod
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass
class GenerationResult:
"""Result from model generation."""
text: str
model: str
tokens: int
duration: float
finish_reason: str
raw_response: Optional[Dict] = None
@dataclass
class ChatMessage:
"""Chat message structure."""
role: str # "system", "user", "assistant"
content: str
tool_calls: Optional[List[Dict]] = None
tool_call_id: Optional[str] = None
class BaseModelClient(ABC):
"""Abstract base class for model clients."""
@abstractmethod
def generate(
self,
prompt: str,
temperature: float = 0.2,
max_tokens: int = 4096,
stop: Optional[List[str]] = None,
**kwargs
) -> GenerationResult:
"""Generate text from a prompt."""
pass
@abstractmethod
def chat(
self,
messages: List[ChatMessage],
temperature: float = 0.2,
max_tokens: int = 4096,
tools: Optional[List[Dict]] = None,
**kwargs
) -> GenerationResult:
"""Generate response from chat messages."""
pass
@abstractmethod
def get_model_name(self) -> str:
"""Get the model name."""
pass
class OllamaClient(BaseModelClient):
"""Client for Ollama local API."""
def __init__(
self,
model: str = "qwen2.5-coder:32b",
base_url: str = "http://localhost:11434",
timeout: int = 300
):
self.model = model
self.base_url = base_url.rstrip('/')
self.timeout = timeout
def generate(
self,
prompt: str,
temperature: float = 0.2,
max_tokens: int = 4096,
stop: Optional[List[str]] = None,
**kwargs
) -> GenerationResult:
"""Generate text using Ollama."""
import requests
url = f"{self.base_url}/api/generate"
payload = {
"model": self.model,
"prompt": prompt,
"temperature": temperature,
"max_tokens": max_tokens,
"stream": False
}
if stop:
payload["stop"] = stop
start_time = time.time()
try:
response = requests.post(url, json=payload, timeout=self.timeout)
response.raise_for_status()
data = response.json()
duration = time.time() - start_time
return GenerationResult(
text=data.get("response", ""),
model=self.model,
tokens=data.get("eval_count", 0),
duration=duration,
finish_reason=data.get("done_reason", "stop"),
raw_response=data
)
except requests.exceptions.RequestException as e:
logger.error(f"Ollama request failed: {e}")
raise
def chat(
self,
messages: List[ChatMessage],
temperature: float = 0.2,
max_tokens: int = 4096,
tools: Optional[List[Dict]] = None,
**kwargs
) -> GenerationResult:
"""Generate chat response using Ollama."""
import requests
url = f"{self.base_url}/api/chat"
payload = {
"model": self.model,
"messages": [
{"role": m.role, "content": m.content}
for m in messages
],
"temperature": temperature,
"max_tokens": max_tokens,
"stream": False
}
if tools:
payload["tools"] = tools
start_time = time.time()
try:
response = requests.post(url, json=payload, timeout=self.timeout)
response.raise_for_status()
data = response.json()
duration = time.time() - start_time
# Extract response
msg = data.get("message", {})
text = msg.get("content", "")
return GenerationResult(
text=text,
model=self.model,
tokens=data.get("eval_count", 0),
duration=duration,
finish_reason=data.get("done_reason", "stop"),
raw_response=data
)
except requests.exceptions.RequestException as e:
logger.error(f"Ollama chat request failed: {e}")
raise
def get_model_name(self) -> str:
return self.model
class OpenAIClient(BaseModelClient):
"""Client for OpenAI API."""
def __init__(
self,
model: str = "gpt-4o",
api_key: Optional[str] = None,
base_url: Optional[str] = None,
timeout: int = 120
):
self.model = model
self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
self.base_url = base_url or os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
self.timeout = timeout
if not self.api_key:
raise ValueError("OpenAI API key required. Set OPENAI_API_KEY environment variable.")
def _get_client(self):
"""Get OpenAI client."""
try:
from openai import OpenAI
return OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=self.timeout)
except ImportError:
raise ImportError("openai package required. Install with: pip install openai")
def generate(
self,
prompt: str,
temperature: float = 0.2,
max_tokens: int = 4096,
stop: Optional[List[str]] = None,
**kwargs
) -> GenerationResult:
"""Generate text using OpenAI."""
client = self._get_client()
start_time = time.time()
try:
response = client.completions.create(
model=self.model,
prompt=prompt,
temperature=temperature,
max_tokens=max_tokens,
stop=stop,
**kwargs
)
duration = time.time() - start_time
return GenerationResult(
text=response.choices[0].text,
model=self.model,
tokens=response.usage.completion_tokens,
duration=duration,
finish_reason=response.choices[0].finish_reason,
raw_response=response.model_dump()
)
except Exception as e:
logger.error(f"OpenAI request failed: {e}")
raise
def chat(
self,
messages: List[ChatMessage],
temperature: float = 0.2,
max_tokens: int = 4096,
tools: Optional[List[Dict]] = None,
**kwargs
) -> GenerationResult:
"""Generate chat response using OpenAI."""
client = self._get_client()
# Convert messages to OpenAI format
chat_messages = []
for msg in messages:
msg_dict = {"role": msg.role, "content": msg.content}
if msg.tool_calls:
msg_dict["tool_calls"] = msg.tool_calls
if msg.tool_call_id:
msg_dict["tool_call_id"] = msg.tool_call_id
chat_messages.append(msg_dict)
# Build request
request_params = {
"model": self.model,
"messages": chat_messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if tools:
request_params["tools"] = tools
request_params.update(kwargs)
start_time = time.time()
try:
response = client.chat.completions.create(**request_params)
duration = time.time() - start_time
msg = response.choices[0].message
text = msg.content or ""
return GenerationResult(
text=text,
model=self.model,
tokens=response.usage.completion_tokens,
duration=duration,
finish_reason=response.choices[0].finish_reason,
raw_response=response.model_dump()
)
except Exception as e:
logger.error(f"OpenAI chat request failed: {e}")
raise
def get_model_name(self) -> str:
return self.model
class AnthropicClient(BaseModelClient):
"""Client for Anthropic API."""
def __init__(
self,
model: str = "claude-sonnet-4-20250514",
api_key: Optional[str] = None,
timeout: int = 120
):
self.model = model
self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "")
if not self.api_key:
raise ValueError("Anthropic API key required. Set ANTHROPIC_API_KEY environment variable.")
def _get_client(self):
"""Get Anthropic client."""
try:
from anthropic import Anthropic
return Anthropic(api_key=self.api_key)
except ImportError:
raise ImportError("anthropic package required. Install with: pip install anthropic")
def generate(
self,
prompt: str,
temperature: float = 0.2,
max_tokens: int = 4096,
**kwargs
) -> GenerationResult:
"""Generate text using Anthropic."""
client = self._get_client()
# Anthropic uses system prompt separately
system = kwargs.pop("system", None)
if system:
messages = [{"role": "user", "content": prompt}]
messages = [{"role": "system", "content": system}] + messages
else:
messages = [{"role": "user", "content": prompt}]
start_time = time.time()
try:
response = client.messages.create(
model=self.model,
system=system,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
**kwargs
)
duration = time.time() - start_time
text = response.content[0].text if response.content else ""
return GenerationResult(
text=text,
model=self.model,
tokens=response.usage.output_tokens,
duration=duration,
finish_reason=response.stop_reason,
raw_response=response.model_dump()
)
except Exception as e:
logger.error(f"Anthropic request failed: {e}")
raise
def chat(
self,
messages: List[ChatMessage],
temperature: float = 0.2,
max_tokens: int = 4096,
tools: Optional[List[Dict]] = None,
**kwargs
) -> GenerationResult:
"""Generate chat response using Anthropic."""
client = self._get_client()
# Convert to Anthropic format
# System message should be separate
system = None
anthropic_messages = []
for msg in messages:
if msg.role == "system":
system = msg.content
else:
anthropic_messages.append({"role": msg.role, "content": msg.content})
request_params = {
"model": self.model,
"messages": anthropic_messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if system:
request_params["system"] = system
if tools:
request_params["tools"] = tools
request_params.update(kwargs)
start_time = time.time()
try:
response = client.messages.create(**request_params)
duration = time.time() - start_time
text = response.content[0].text if response.content else ""
return GenerationResult(
text=text,
model=self.model,
tokens=response.usage.output_tokens,
duration=duration,
finish_reason=response.stop_reason,
raw_response=response.model_dump()
)
except Exception as e:
logger.error(f"Anthropic chat request failed: {e}")
raise
def get_model_name(self) -> str:
return self.model
class TogetherClient(BaseModelClient):
"""Client for Together AI API (OpenAI-compatible)."""
def __init__(
self,
model: str = "togethercomputer/Qwen2.5-Coder-32B-Instruct",
api_key: Optional[str] = None,
base_url: str = "https://api.together.xyz/v1",
timeout: int = 120
):
self.model = model
self.api_key = api_key or os.environ.get("TOGETHER_API_KEY", "")
self.base_url = base_url
self.timeout = timeout
if not self.api_key:
raise ValueError("Together API key required. Set TOGETHER_API_KEY environment variable.")
def _get_client(self):
"""Get OpenAI-compatible client."""
try:
from openai import OpenAI
return OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=self.timeout)
except ImportError:
raise ImportError("openai package required. Install with: pip install openai")
def generate(
self,
prompt: str,
temperature: float = 0.2,
max_tokens: int = 4096,
stop: Optional[List[str]] = None,
**kwargs
) -> GenerationResult:
"""Generate text using Together."""
client = self._get_client()
start_time = time.time()
try:
response = client.completions.create(
model=self.model,
prompt=prompt,
temperature=temperature,
max_tokens=max_tokens,
stop=stop,
**kwargs
)
duration = time.time() - start_time
return GenerationResult(
text=response.choices[0].text,
model=self.model,
tokens=response.usage.completion_tokens,
duration=duration,
finish_reason=response.choices[0].finish_reason,
raw_response=response.model_dump()
)
except Exception as e:
logger.error(f"Together request failed: {e}")
raise
def chat(
self,
messages: List[ChatMessage],
temperature: float = 0.2,
max_tokens: int = 4096,
tools: Optional[List[Dict]] = None,
**kwargs
) -> GenerationResult:
"""Generate chat response using Together."""
client = self._get_client()
# Convert messages to chat format
chat_messages = [{"role": m.role, "content": m.content} for m in messages]
request_params = {
"model": self.model,
"messages": chat_messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if tools:
request_params["tools"] = tools
request_params.update(kwargs)
start_time = time.time()
try:
response = client.chat.completions.create(**request_params)
duration = time.time() - start_time
msg = response.choices[0].message
text = msg.content or ""
result = GenerationResult(
text=text,
model=self.model,
tokens=response.usage.completion_tokens,
duration=duration,
finish_reason=response.choices[0].finish_reason,
raw_response=response.model_dump()
)
return result
except Exception as e:
logger.error(f"Together chat request failed: {e}")
raise
def get_model_name(self) -> str:
return self.model
class OpenRouterClient(BaseModelClient):
"""Client for OpenRouter API (unified interface for multiple models)."""
def __init__(
self,
model: str = "qwen/qwen2.5-coder-32b",
api_key: Optional[str] = None,
base_url: str = "https://openrouter.ai/api/v1",
timeout: int = 120,
http_referer: Optional[str] = None,
x_title: Optional[str] = None
):
self.model = model
self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY", "")
self.base_url = base_url
self.timeout = timeout
self.http_referer = http_referer or os.environ.get("HTTP_REFERER", "")
self.x_title = x_title or os.environ.get("X_TITLE", "Stack 2.9")
if not self.api_key:
raise ValueError("OpenRouter API key required. Set OPENROUTER_API_KEY environment variable.")
def _get_client(self):
"""Get OpenAI-compatible client."""
try:
from openai import OpenAI
return OpenAI(api_key=self.api_key, base_url=self.base_url, timeout=self.timeout)
except ImportError:
raise ImportError("openai package required. Install with: pip install openai")
def generate(
self,
prompt: str,
temperature: float = 0.2,
max_tokens: int = 4096,
stop: Optional[List[str]] = None,
**kwargs
) -> GenerationResult:
"""Generate text using OpenRouter."""
client = self._get_client()
start_time = time.time()
try:
response = client.completions.create(
model=self.model,
prompt=prompt,
temperature=temperature,
max_tokens=max_tokens,
stop=stop,
**kwargs
)
duration = time.time() - start_time
result = GenerationResult(
text=response.choices[0].text,
model=self.model,
tokens=response.usage.completion_tokens,
duration=duration,
finish_reason=response.choices[0].finish_reason,
raw_response=response.model_dump()
)
return result
except Exception as e:
logger.error(f"OpenRouter request failed: {e}")
raise
def chat(
self,
messages: List[ChatMessage],
temperature: float = 0.2,
max_tokens: int = 4096,
tools: Optional[List[Dict]] = None,
**kwargs
) -> GenerationResult:
"""Generate chat response using OpenRouter."""
client = self._get_client()
# Convert messages to chat format
chat_messages = [{"role": m.role, "content": m.content} for m in messages]
request_params = {
"model": self.model,
"messages": chat_messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if tools:
request_params["tools"] = tools
request_params.update(kwargs)
# Add OpenRouter-specific headers
extra_headers = {}
if self.http_referer:
extra_headers["HTTP-Referer"] = self.http_referer
if self.x_title:
extra_headers["X-Title"] = self.x_title
start_time = time.time()
try:
response = client.chat.completions.create(
extra_headers=extra_headers if extra_headers else None,
**request_params
)
duration = time.time() - start_time
msg = response.choices[0].message
text = msg.content or ""
result = GenerationResult(
text=text,
model=self.model,
tokens=response.usage.completion_tokens,
duration=duration,
finish_reason=response.choices[0].finish_reason,
raw_response=response.model_dump()
)
return result
except Exception as e:
logger.error(f"OpenRouter chat request failed: {e}")
raise
def get_model_name(self) -> str:
return self.model
def create_model_client(
provider: str = "ollama",
model: Optional[str] = None,
**kwargs
) -> BaseModelClient:
"""
Factory function to create model client.
Args:
provider: One of "ollama", "openai", "anthropic", "openrouter", "together"
model: Model name (defaults to provider's default)
**kwargs: Additional client configuration
Returns:
BaseModelClient instance
"""
if provider == "ollama":
default_model = model or os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:32b")
return OllamaClient(model=default_model, **kwargs)
elif provider == "openai":
default_model = model or os.environ.get("OPENAI_MODEL", "gpt-4o")
return OpenAIClient(model=default_model, **kwargs)
elif provider == "anthropic":
default_model = model or os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
return AnthropicClient(model=default_model, **kwargs)
elif provider == "openrouter":
default_model = model or os.environ.get("OPENROUTER_MODEL", "qwen/qwen2.5-coder-32b")
return OpenRouterClient(model=default_model, **kwargs)
elif provider == "together":
default_model = model or os.environ.get("TOGETHER_MODEL", "togethercomputer/Qwen2.5-Coder-32B-Instruct")
return TogetherClient(model=default_model, **kwargs)
else:
raise ValueError(f"Unknown provider: {provider}. Use: ollama, openai, anthropic, openrouter, together")
class ModelClientPool:
"""Pool of model clients for different purposes."""
def __init__(self):
self.clients: Dict[str, BaseModelClient] = {}
def add_client(self, name: str, client: BaseModelClient):
"""Add a client to the pool."""
self.clients[name] = client
def get_client(self, name: str = "default") -> BaseModelClient:
"""Get client by name."""
if name not in self.clients:
# Try to create default client
provider = os.environ.get("MODEL_PROVIDER", "ollama")
self.clients[name] = create_model_client(provider)
return self.clients[name]
def generate(
self,
prompt: str,
client_name: str = "default",
**kwargs
) -> GenerationResult:
"""Generate using named client."""
return self.get_client(client_name).generate(prompt, **kwargs)
def chat(
self,
messages: List[ChatMessage],
client_name: str = "default",
**kwargs
) -> GenerationResult:
"""Chat using named client."""
return self.get_client(client_name).chat(messages, **kwargs)
# Default pool instance
_default_pool = None
def get_default_pool() -> ModelClientPool:
"""Get default model client pool."""
global _default_pool
if _default_pool is None:
_default_pool = ModelClientPool()
return _default_pool
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Stack 2.9 Model Client")
parser.add_argument("--provider", choices=["ollama", "openai", "anthropic", "openrouter", "together"],
default="ollama", help="Model provider")
parser.add_argument("--model", type=str, help="Model name")
parser.add_argument("--prompt", type=str, required=True, help="Prompt to generate")
parser.add_argument("--temperature", type=float, default=0.2, help="Temperature")
args = parser.parse_args()
# Create client
client = create_model_client(args.provider, args.model)
print(f"Using model: {client.get_model_name()}")
print(f"Provider: {args.provider}")
print("-" * 40)
# Generate
result = client.generate(args.prompt, temperature=args.temperature)
print(f"Response:\n{result.text}")
print("-" * 40)
print(f"Tokens: {result.tokens}, Duration: {result.duration:.2f}s")