| """ |
| Ollama client wrapper for MiniMax-M2. |
| Compatible with /api/generate streaming endpoint. |
| """ |
| import os |
| import requests |
| import json |
| from typing import Dict |
|
|
| OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://127.0.0.1:11434") |
| OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "minimax-m2:cloud") |
| DEFAULT_TEMPERATURE = float(os.getenv("DEFAULT_TEMPERATURE", 0.2)) |
| MAX_TOKENS = int(os.getenv("MAX_TOKENS", 1024)) |
|
|
| def ask_ollama(prompt: str, extra: Dict = None) -> str: |
| """Send a prompt to the local Ollama API and return the generated text.""" |
| payload = { |
| "model": OLLAMA_MODEL, |
| "prompt": prompt, |
| "options": { |
| "temperature": DEFAULT_TEMPERATURE, |
| "num_predict": MAX_TOKENS |
| } |
| } |
|
|
| if extra: |
| payload.update(extra) |
|
|
| url = f"{OLLAMA_API_URL}/api/generate" |
| resp = requests.post(url, json=payload, stream=True, timeout=180) |
| resp.raise_for_status() |
|
|
| response_text = "" |
| for line in resp.iter_lines(): |
| if not line: |
| continue |
| try: |
| data = json.loads(line.decode("utf-8")) |
| if "response" in data: |
| response_text += data["response"] |
| except Exception: |
| continue |
|
|
| return response_text.strip() |
|
|