tfrere's picture
tfrere HF Staff
fix(agent): default back to gpt-oss-120b, pin Llama 3.3 to Together
3afbbdf
import { editorTools } from "./tools.js";
import { SYSTEM_PROMPT, buildMessages } from "./system-prompt.js";
import { streamChatResponse } from "./stream-handler.js";
import type { Request, Response } from "express";
/**
* Models exposed in the UI picker. All ids must be served by Hugging
* Face Inference Providers (`https://router.huggingface.co/v1`) and
* support function/tool calling - the agent loop won't work without it.
*
* Note about provider suffixes (`:provider`):
* HF Router defaults to the `:fastest` provider for a given model.
* That's usually fine, but a few providers don't fit the editor's
* workload:
* - Groq enforces strict tool-call validation and tends to reject
* our 18-tool registry with `Failed to call a function`.
* - Nscale + a few others reject the `tools` parameter outright.
* - Fireworks has deprecated several Llama 3.x checkpoints.
* We pin `Llama-3.3-70B` to Together, which serves the model with
* full tool-calling support. Unsuffixed ids use the default :fastest
* policy.
*
* Discover more conversational models here:
* https://huggingface.co/models?inference_provider=all&other=conversational
*
* `context` is the advertised context window; `cost` is a rough
* relative price tag ($, $$, $$$) - inference providers charge their
* own rates, see the docs for the source of truth.
*/
export const AVAILABLE_MODELS = [
{ id: "openai/gpt-oss-120b", label: "GPT-OSS 120B", context: "131K", cost: "$$" },
{ id: "openai/gpt-oss-20b", label: "GPT-OSS 20B", context: "131K", cost: "$" },
{ id: "meta-llama/Llama-3.3-70B-Instruct:together", label: "Llama 3.3 70B", context: "128K", cost: "$" },
{ id: "Qwen/Qwen3-Coder-480B-A35B-Instruct", label: "Qwen3 Coder 480B", context: "262K", cost: "$$" },
{ id: "deepseek-ai/DeepSeek-V3.1", label: "DeepSeek V3.1", context: "128K", cost: "$$" },
];
export async function handleChat(req: Request, res: Response) {
const { context } = req.body;
const contextBlock = buildMessages(context?.document, context?.selection, context?.frontmatter);
const systemPrompt = contextBlock
? `${SYSTEM_PROMPT}\n\n## Current context\n\n${contextBlock}`
: SYSTEM_PROMPT;
return streamChatResponse(req, res, {
systemPrompt,
tools: editorTools,
logPrefix: "chat",
});
}