Spaces:

chipling
/

api

Paused

App Files Files Community

api / models /text /deepinfra /main.py

chipling

Update models/text/deepinfra/main.py

f6ab7af verified 10 months ago

raw

history blame contribute delete

5.19 kB

	import random
	import httpx
	import asyncio
	import json

	class OFFDeepInfraAPI:
	headers = {
	'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8',
	'Connection': 'keep-alive',
	'Content-Type': 'application/json',
	'Origin': 'https://deepinfra.com',
	'Referer': 'https://deepinfra.com/',
	'Sec-Fetch-Dest': 'empty',
	'Sec-Fetch-Mode': 'cors',
	'Sec-Fetch-Site': 'same-site',
	'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Mobile Safari/537.36',
	'X-Deepinfra-Source': 'web-embed',
	'accept': 'text/event-stream',
	'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
	'sec-ch-ua-mobile': '?1',
	'sec-ch-ua-platform': '"Android"',
	}

	def __init__(self):
	self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions"

	def get_model_list(self):
	return [
	'meta-llama/Llama-3.3-70B-Instruct-Turbo',
	'deepseek-ai/DeepSeek-R1-Turbo',
	'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
	'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B'
	]

	async def generate(self, json_data: dict):
	json_data['stream'] = True # Ensure stream is enabled
	json_data['stream_options'] = {
	'include_usage': True,
	'continuous_usage_stats': True,
	}

	chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32))
	created = int(asyncio.get_event_loop().time())
	total_completion_tokens = 0
	model_name = json_data.get("model", "unknown")

	try:
	async with httpx.AsyncClient(timeout=None) as client:
	async with client.stream(
	"POST",
	self.base_url,
	headers=OFFDeepInfraAPI.headers,
	json=json_data
	) as response:
	if response.status_code != 200:
	yield f"data: [Unexpected status code: {response.status_code}]\n\n"
	return

	async for line in response.aiter_lines():
	if not line or not line.startswith("data:"):
	continue

	data_str = line.removeprefix("data:").strip()
	if data_str == "[DONE]":
	yield "data: [DONE]\n\n"
	return

	try:
	data = json.loads(data_str)
	delta = data["choices"][0].get("delta", {})
	content = delta.get("content", "")
	finish_reason = data["choices"][0].get("finish_reason", None)

	if content or finish_reason:
	transformed = {
	"id": chunk_id,
	"object": "chat.completion.chunk",
	"created": created,
	"choices": [{
	"index": 0,
	"text": content,
	"logprobs": None,
	"finish_reason": finish_reason,
	"delta": {
	"token_id": None,
	"role": delta.get("role", "assistant"),
	"content": content,
	"tool_calls": delta.get("tool_calls"),
	}
	}],
	"model": model_name,
	"usage": None
	}
	yield f"data: {json.dumps(transformed)}\n\n"

	# Update usage stats
	usage = data.get("usage")
	if usage:
	total_completion_tokens = usage.get("completion_tokens", total_completion_tokens)

	except json.JSONDecodeError:
	continue

	# Final usage chunk
	final = {
	"id": chunk_id,
	"object": "chat.completion.chunk",
	"created": created,
	"choices": [],
	"model": model_name,
	"usage": {
	"prompt_tokens": 0,
	"completion_tokens": total_completion_tokens,
	"total_tokens": total_completion_tokens
	}
	}
	yield f"data: {json.dumps(final)}\n\n"
	yield "data: [DONE]\n\n"

	except Exception as e:
	yield f"data: [Connection error: {str(e)}]\n\n"