| | import random |
| | import httpx |
| | import asyncio |
| | import json |
| |
|
| | class OFFDeepInfraAPI: |
| | headers = { |
| | 'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8', |
| | 'Connection': 'keep-alive', |
| | 'Content-Type': 'application/json', |
| | 'Origin': 'https://deepinfra.com', |
| | 'Referer': 'https://deepinfra.com/', |
| | 'Sec-Fetch-Dest': 'empty', |
| | 'Sec-Fetch-Mode': 'cors', |
| | 'Sec-Fetch-Site': 'same-site', |
| | 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Mobile Safari/537.36', |
| | 'X-Deepinfra-Source': 'web-embed', |
| | 'accept': 'text/event-stream', |
| | 'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"', |
| | 'sec-ch-ua-mobile': '?1', |
| | 'sec-ch-ua-platform': '"Android"', |
| | } |
| |
|
| | def __init__(self): |
| | self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions" |
| |
|
| | def get_model_list(self): |
| | return [ |
| | 'meta-llama/Llama-3.3-70B-Instruct-Turbo', |
| | 'deepseek-ai/DeepSeek-R1-Turbo', |
| | 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', |
| | 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B' |
| | ] |
| |
|
| | async def generate(self, json_data: dict): |
| | json_data['stream'] = True |
| | json_data['stream_options'] = { |
| | 'include_usage': True, |
| | 'continuous_usage_stats': True, |
| | } |
| |
|
| | chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32)) |
| | created = int(asyncio.get_event_loop().time()) |
| | total_completion_tokens = 0 |
| | model_name = json_data.get("model", "unknown") |
| |
|
| | try: |
| | async with httpx.AsyncClient(timeout=None) as client: |
| | async with client.stream( |
| | "POST", |
| | self.base_url, |
| | headers=OFFDeepInfraAPI.headers, |
| | json=json_data |
| | ) as response: |
| | if response.status_code != 200: |
| | yield f"data: [Unexpected status code: {response.status_code}]\n\n" |
| | return |
| |
|
| | async for line in response.aiter_lines(): |
| | if not line or not line.startswith("data:"): |
| | continue |
| |
|
| | data_str = line.removeprefix("data:").strip() |
| | if data_str == "[DONE]": |
| | yield "data: [DONE]\n\n" |
| | return |
| |
|
| | try: |
| | data = json.loads(data_str) |
| | delta = data["choices"][0].get("delta", {}) |
| | content = delta.get("content", "") |
| | finish_reason = data["choices"][0].get("finish_reason", None) |
| |
|
| | if content or finish_reason: |
| | transformed = { |
| | "id": chunk_id, |
| | "object": "chat.completion.chunk", |
| | "created": created, |
| | "choices": [{ |
| | "index": 0, |
| | "text": content, |
| | "logprobs": None, |
| | "finish_reason": finish_reason, |
| | "delta": { |
| | "token_id": None, |
| | "role": delta.get("role", "assistant"), |
| | "content": content, |
| | "tool_calls": delta.get("tool_calls"), |
| | } |
| | }], |
| | "model": model_name, |
| | "usage": None |
| | } |
| | yield f"data: {json.dumps(transformed)}\n\n" |
| |
|
| | |
| | usage = data.get("usage") |
| | if usage: |
| | total_completion_tokens = usage.get("completion_tokens", total_completion_tokens) |
| |
|
| | except json.JSONDecodeError: |
| | continue |
| |
|
| | |
| | final = { |
| | "id": chunk_id, |
| | "object": "chat.completion.chunk", |
| | "created": created, |
| | "choices": [], |
| | "model": model_name, |
| | "usage": { |
| | "prompt_tokens": 0, |
| | "completion_tokens": total_completion_tokens, |
| | "total_tokens": total_completion_tokens |
| | } |
| | } |
| | yield f"data: {json.dumps(final)}\n\n" |
| | yield "data: [DONE]\n\n" |
| |
|
| | except Exception as e: |
| | yield f"data: [Connection error: {str(e)}]\n\n" |