carbon-tokenization

Running

App Files Files Community

carbon-tokenization / backend /src /agent /chat.ts

tfrere HF Staff

fix(agent): default back to gpt-oss-120b, pin Llama 3.3 to Together

3afbbdf 22 days ago

raw

history blame contribute delete

2.3 kB

	import { editorTools } from "./tools.js";
	import { SYSTEM_PROMPT, buildMessages } from "./system-prompt.js";
	import { streamChatResponse } from "./stream-handler.js";
	import type { Request, Response } from "express";

	/**
	* Models exposed in the UI picker. All ids must be served by Hugging
	* Face Inference Providers (`https://router.huggingface.co/v1`) and
	* support function/tool calling - the agent loop won't work without it.
	*
	* Note about provider suffixes (`:provider`):
	* HF Router defaults to the `:fastest` provider for a given model.
	* That's usually fine, but a few providers don't fit the editor's
	* workload:
	* - Groq enforces strict tool-call validation and tends to reject
	* our 18-tool registry with `Failed to call a function`.
	* - Nscale + a few others reject the `tools` parameter outright.
	* - Fireworks has deprecated several Llama 3.x checkpoints.
	* We pin `Llama-3.3-70B` to Together, which serves the model with
	* full tool-calling support. Unsuffixed ids use the default :fastest
	* policy.
	*
	* Discover more conversational models here:
	* https://huggingface.co/models?inference_provider=all&other=conversational
	*
	* `context` is the advertised context window; `cost` is a rough
	* relative price tag ($, $$, $$$) - inference providers charge their
	* own rates, see the docs for the source of truth.
	*/
	export const AVAILABLE_MODELS = [
	{ id: "openai/gpt-oss-120b", label: "GPT-OSS 120B", context: "131K", cost: "$$" },
	{ id: "openai/gpt-oss-20b", label: "GPT-OSS 20B", context: "131K", cost: "$" },
	{ id: "meta-llama/Llama-3.3-70B-Instruct:together", label: "Llama 3.3 70B", context: "128K", cost: "$" },
	{ id: "Qwen/Qwen3-Coder-480B-A35B-Instruct", label: "Qwen3 Coder 480B", context: "262K", cost: "$$" },
	{ id: "deepseek-ai/DeepSeek-V3.1", label: "DeepSeek V3.1", context: "128K", cost: "$$" },
	];

	export async function handleChat(req: Request, res: Response) {
	const { context } = req.body;
	const contextBlock = buildMessages(context?.document, context?.selection, context?.frontmatter);
	const systemPrompt = contextBlock
	? `${SYSTEM_PROMPT}\n\n## Current context\n\n${contextBlock}`
	: SYSTEM_PROMPT;

	return streamChatResponse(req, res, {
	systemPrompt,
	tools: editorTools,
	logPrefix: "chat",
	});
	}