import { editorTools } from "./tools.js"; import { SYSTEM_PROMPT, buildMessages } from "./system-prompt.js"; import { streamChatResponse } from "./stream-handler.js"; import type { Request, Response } from "express"; /** * Models exposed in the UI picker. All ids must be served by Hugging * Face Inference Providers (`https://router.huggingface.co/v1`) and * support function/tool calling - the agent loop won't work without it. * * Note about provider suffixes (`:provider`): * HF Router defaults to the `:fastest` provider for a given model. * That's usually fine, but a few providers don't fit the editor's * workload: * - Groq enforces strict tool-call validation and tends to reject * our 18-tool registry with `Failed to call a function`. * - Nscale + a few others reject the `tools` parameter outright. * - Fireworks has deprecated several Llama 3.x checkpoints. * We pin `Llama-3.3-70B` to Together, which serves the model with * full tool-calling support. Unsuffixed ids use the default :fastest * policy. * * Discover more conversational models here: * https://huggingface.co/models?inference_provider=all&other=conversational * * `context` is the advertised context window; `cost` is a rough * relative price tag ($, $$, $$$) - inference providers charge their * own rates, see the docs for the source of truth. */ export const AVAILABLE_MODELS = [ { id: "openai/gpt-oss-120b", label: "GPT-OSS 120B", context: "131K", cost: "$$" }, { id: "openai/gpt-oss-20b", label: "GPT-OSS 20B", context: "131K", cost: "$" }, { id: "meta-llama/Llama-3.3-70B-Instruct:together", label: "Llama 3.3 70B", context: "128K", cost: "$" }, { id: "Qwen/Qwen3-Coder-480B-A35B-Instruct", label: "Qwen3 Coder 480B", context: "262K", cost: "$$" }, { id: "deepseek-ai/DeepSeek-V3.1", label: "DeepSeek V3.1", context: "128K", cost: "$$" }, ]; export async function handleChat(req: Request, res: Response) { const { context } = req.body; const contextBlock = buildMessages(context?.document, context?.selection, context?.frontmatter); const systemPrompt = contextBlock ? `${SYSTEM_PROMPT}\n\n## Current context\n\n${contextBlock}` : SYSTEM_PROMPT; return streamChatResponse(req, res, { systemPrompt, tools: editorTools, logPrefix: "chat", }); }