| import { streamText, convertToModelMessages } from "ai"; |
| import type { UIMessage } from "ai"; |
| import { createOpenAICompatible } from "@ai-sdk/openai-compatible"; |
| import type { Request, Response } from "express"; |
| import { extractToken } from "../auth.js"; |
|
|
| |
| |
| |
| |
| |
| |
| export const DEFAULT_MODEL = "openai/gpt-oss-120b"; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const HF_INFERENCE_BASE_URL = "https://router.huggingface.co/v1"; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| function resolveHfToken(req: Request): string | undefined { |
| const userToken = extractToken(req.headers.cookie); |
| if (userToken) return userToken; |
| const envToken = process.env.HF_TOKEN; |
| if (envToken) return envToken; |
| return undefined; |
| } |
|
|
| function createProvider(apiKey: string) { |
| return createOpenAICompatible({ |
| name: "huggingface", |
| baseURL: HF_INFERENCE_BASE_URL, |
| apiKey, |
| }); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| function stripReasoningParts(messages: UIMessage[]): UIMessage[] { |
| return messages.map((m) => { |
| if (m.role !== "assistant" || !Array.isArray(m.parts)) return m; |
| const filtered = m.parts.filter( |
| (p) => typeof p.type !== "string" || p.type !== "reasoning", |
| ); |
| if (filtered.length === m.parts.length) return m; |
| return { ...m, parts: filtered }; |
| }); |
| } |
|
|
| interface StreamChatOptions { |
| systemPrompt: string; |
| tools: Parameters<typeof streamText>[0]["tools"]; |
| logPrefix: string; |
| } |
|
|
| export async function streamChatResponse( |
| req: Request, |
| res: Response, |
| { systemPrompt, tools, logPrefix }: StreamChatOptions, |
| ) { |
| try { |
| const { messages, model } = req.body; |
|
|
| if (!messages || !Array.isArray(messages)) { |
| res.status(400).json({ error: "messages array is required" }); |
| return; |
| } |
|
|
| const apiKey = resolveHfToken(req); |
| if (!apiKey) { |
| res.status(500).json({ |
| error: |
| "No Hugging Face token available. Sign in with your HF account " + |
| "(the OAuth token is used to call Inference Providers) or set " + |
| "HF_TOKEN in the backend environment.", |
| }); |
| return; |
| } |
|
|
| const provider = createProvider(apiKey); |
| const modelId = model || process.env.HF_INFERENCE_MODEL || DEFAULT_MODEL; |
| const modelMessages = await convertToModelMessages(stripReasoningParts(messages)); |
|
|
| const result = streamText({ |
| model: provider.chatModel(modelId), |
| system: systemPrompt, |
| messages: modelMessages, |
| tools, |
| }); |
|
|
| const webResponse = result.toUIMessageStreamResponse({ |
| onError: (error) => { |
| console.error(`[${logPrefix}] stream error:`, error); |
| return error instanceof Error ? error.message : "Stream error"; |
| }, |
| }); |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const headers: Record<string, string> = Object.fromEntries( |
| webResponse.headers.entries(), |
| ); |
| headers["X-Accel-Buffering"] = "no"; |
| headers["Cache-Control"] = "no-cache, no-transform"; |
| headers["Content-Encoding"] = "identity"; |
| res.writeHead(webResponse.status, headers); |
| res.flushHeaders?.(); |
| |
| |
| res.socket?.setNoDelay?.(true); |
|
|
| const reader = webResponse.body!.getReader(); |
| const pump = async (): Promise<void> => { |
| const { done, value } = await reader.read(); |
| if (done) { |
| res.end(); |
| return; |
| } |
| res.write(value); |
| return pump(); |
| }; |
| await pump(); |
| } catch (error: unknown) { |
| const message = |
| error instanceof Error ? error.message : "Internal server error"; |
| console.error(`[${logPrefix}] error:`, message); |
|
|
| if (!res.headersSent) { |
| res.status(500).json({ error: message }); |
| } |
| } |
| } |
|
|