Spaces:

incognitolm
/

chat-dev

Running

App Files Files Community

chat-dev / server /chatStream.js

incognitolm

Server connection issues

c97ad08 4 days ago

history blame contribute delete

75.7 kB

	import OpenAI from "openai";
	import { Worker } from "worker_threads";
	import { fileURLToPath } from "url";
	import path from "path";
	import { LIGHTNING_BASE } from "./config.js";
	import WebSocket from "ws";
	import crypto from "crypto";
	import { encoding_for_model } from "tiktoken";
	import { mediaStore } from "./mediaStore.js";
	import { memoryStore } from "./memoryStore.js";
	import { systemPromptStore } from "./systemPromptStore.js";
	import { consumeWebSearchUsage } from "./webSearchUsageStore.js";
	import { listAppDocSections, readAppDocSection } from "./appDocs.js";

	const __dirname = path.dirname(fileURLToPath(import.meta.url));
	const WORKER_PATH = path.join(__dirname, "searchWorker.js");

	// Persistent WebSocket pool
	let persistentWs = null;
	let wsAuthPromise = null;
	let requestIdCounter = 0; // Upstream request IDs are scoped to the current websocket connection.
	let activeStreamHandlers = new Map(); // Track active stream handlers by request ID
	let errorHandlers = new Map(); // Track error handlers by request ID

	function buildUpstreamSocketMessage(reason, err = null) {
	return [reason, err?.message].filter(Boolean).join(": ");
	}

	function invalidatePersistentWebSocket(ws, reason, err = null) {
	if (!ws \|\| persistentWs !== ws) return;

	persistentWs = null;
	wsAuthPromise = null;
	requestIdCounter = 0;
	activeStreamHandlers.delete("__messageListener__");
	activeStreamHandlers.delete("__errorHandler__");
	activeStreamHandlers.delete("__closeHandler__");

	const failure =
	err?.name === "RetryableUpstreamConnectionError"
	? err
	: new RetryableUpstreamConnectionError(
	"The model connection was interrupted. Reconnecting automatically.",
	buildUpstreamSocketMessage(reason, err)
	);

	const pendingErrorHandlers = [...errorHandlers.values()];
	for (const handler of pendingErrorHandlers) {
	try {
	handler(failure);
	} catch {
	// Ignore request-specific cleanup errors while invalidating the shared socket.
	}
	}

	try {
	ws.removeAllListeners("message");
	ws.removeAllListeners("error");
	ws.removeAllListeners("close");
	} catch {
	// Ignore listener cleanup failures.
	}

	if (ws.readyState === WebSocket.OPEN \|\| ws.readyState === WebSocket.CONNECTING) {
	try {
	ws.terminate();
	} catch {
	// Ignore termination failures while resetting the shared socket.
	}
	}
	}

	async function getSafeWebSocket() {
	if (persistentWs && persistentWs.readyState === WebSocket.OPEN) return persistentWs;
	if (wsAuthPromise) return wsAuthPromise;

	wsAuthPromise = (async () => {
	const lightningBase = process.env.LIGHTNING_BASE \|\| LIGHTNING_BASE;
	const wsURL =
	(lightningBase.startsWith("https")
	? lightningBase.replace("https", "wss")
	: lightningBase.replace("http", "ws")) + "/ws/chat";

	const ws = new WebSocket(wsURL);
	persistentWs = ws;

	const safeParse = (str) => {
	try {
	const cleaned = str.startsWith("data: ") ? str.slice(6) : str;
	return JSON.parse(cleaned);
	} catch (err) {
	console.warn("[WS] JSON parse error:", err, "on string:", str);
	return null;
	}
	};

	await new Promise((resolve, reject) => {
	const cleanup = () => {
	clearTimeout(timer);
	ws.removeListener("open", onOpen);
	ws.removeListener("error", onError);
	ws.removeListener("close", onClose);
	};
	const timer = setTimeout(() => {
	cleanup();
	if (persistentWs === ws) {
	persistentWs = null;
	wsAuthPromise = null;
	}
	try {
	ws.terminate();
	} catch {
	// Ignore termination failures while timing out the handshake.
	}
	reject(new Error("WS connection timeout"));
	}, 5000);
	const onOpen = () => {
	cleanup();
	resolve();
	};
	const onError = (err) => {
	cleanup();
	console.error("[WS] Connection error", err);
	if (persistentWs === ws) {
	persistentWs = null;
	wsAuthPromise = null;
	}
	reject(err);
	};
	const onClose = (code, reasonBuffer) => {
	cleanup();
	const reason = reasonBuffer?.toString?.() \|\| "";
	if (persistentWs === ws) {
	persistentWs = null;
	wsAuthPromise = null;
	}
	reject(new Error(`WS connection closed (${code})${reason ? `: ${reason}` : ""}`));
	};
	ws.on("open", onOpen);
	ws.on("error", onError);
	ws.on("close", onClose);
	});

	ws.send(JSON.stringify({ key: process.env.WEBSOCKET_KEY }));

	await new Promise((resolve, reject) => {
	const cleanup = () => {
	clearTimeout(timer);
	ws.removeListener("message", authHandler);
	ws.removeListener("error", onError);
	ws.removeListener("close", onClose);
	};
	const timer = setTimeout(() => {
	cleanup();
	if (persistentWs === ws) {
	persistentWs = null;
	wsAuthPromise = null;
	}
	try {
	ws.terminate();
	} catch {
	// Ignore termination failures while timing out auth.
	}
	reject(new Error("WS auth timeout"));
	}, 5000);
	const authHandler = (data) => {
	const msg = safeParse(data.toString());
	if (!msg) return;
	if (msg.type === "auth" && msg.status === "ok") {
	cleanup();
	resolve();
	}
	if (msg.error) {
	cleanup();
	console.error("[WS] Auth error", msg.error);
	if (persistentWs === ws) {
	persistentWs = null;
	wsAuthPromise = null;
	}
	reject(new Error(`WS auth error: ${msg.error}`));
	}
	};
	const onError = (err) => {
	cleanup();
	console.error("[WS] Auth error event", err);
	if (persistentWs === ws) {
	persistentWs = null;
	wsAuthPromise = null;
	}
	reject(err);
	};
	const onClose = (code, reasonBuffer) => {
	cleanup();
	const reason = reasonBuffer?.toString?.() \|\| "";
	if (persistentWs === ws) {
	persistentWs = null;
	wsAuthPromise = null;
	}
	reject(new Error(`WS auth closed (${code})${reason ? `: ${reason}` : ""}`));
	};
	ws.on("message", authHandler);
	ws.on("error", onError);
	ws.on("close", onClose);
	});

	const globalMessageHandler = (data) => {
	const line = data.toString();
	for (const [id, handler] of activeStreamHandlers.entries()) {
	if (!String(id).startsWith("__")) handler(line);
	}
	};

	const globalErrorHandler = (err) => {
	console.error("[WS ERROR]", err);
	invalidatePersistentWebSocket(ws, "Upstream websocket error", err);
	};

	const globalCloseHandler = (code, reasonBuffer) => {
	const reason = reasonBuffer?.toString?.() \|\| "";
	console.warn(`[WS CLOSE] ${code}${reason ? `: ${reason}` : ""}`);
	invalidatePersistentWebSocket(
	ws,
	`Upstream websocket closed (${code})${reason ? `: ${reason}` : ""}`
	);
	};

	ws.on("message", globalMessageHandler);
	ws.on("error", globalErrorHandler);
	ws.on("close", globalCloseHandler);
	requestIdCounter = 0;
	activeStreamHandlers.set("__messageListener__", globalMessageHandler);
	activeStreamHandlers.set("__errorHandler__", globalErrorHandler);
	activeStreamHandlers.set("__closeHandler__", globalCloseHandler);

	wsAuthPromise = null;
	return ws;
	})();

	return wsAuthPromise;
	}

	async function gradioSearch(query) {
	const req = await fetch("https://incognitolm-Web-Search.hf.space/api/search", {
	method: "POST",
	headers: { "Content-Type": "application/json" },
	body: JSON.stringify({ query }),
	});
	const res = await req.json();
	if (!req.ok \|\| !res.results) throw new Error(`Search API error: ${res.error \|\| req.statusText}`);
	return res.results[0];
	}

	const SYSTEM_PROMPT =
	"CRITICAL RULE: Every response MUST use HTML <span data-color=\"{COLOR NAME}\"> tags to color main points and headings unless you are told otherwise. " +
	"COLORS MUST HAVE MEANING AND CONSISTENCY ACROSS THE ENTIRE CONVERSATION. " +
	"You may ONLY use the following semantic color names: green, pink, blue, red, orange, yellow, purple, teal, gold, coral. " +
	"Never output text formatted with explicit black or white colors. Always put color <span> tags as close to the text as possible and do not include markdown within the tags. " +
	"Use a variety of colors throughout every response to distinguish headings, sections, and key terms. " +
	"Keep code blocks plain, but color headings and important points in surrounding text. " +
	"Do not over-color responses. Use color intentionally and sparingly. " +
	"CRITICAL RULE: MARKDOWN FORMATTING SUCH AS #, ##, ###, *, MUST BE PLACED OUTSIDE <span> tags. Use the same colors for similar meanings." +
	"You are a helpful, friendly AI assistant. Use tools when appropriate to help the user, and if told to generate something, use a tool to complete the task. " +
	"When generating media, do not include URLs — it is displayed automatically. " +
	"You can render SVG images by outputting SVG code in a code block tagged exactly as:\n```svg\n<svg>...</svg>\n```\n" +
	"Never use single backslashes. You may use emojis where appropriate. " +
	"Use markdown for everything other than coloring your text. Use tables, lists, and other markdown elements. " +
	"ATTACHMENT HANDLING RULE: Large user prompts, text attachments, conversation history, and image attachments may be staged into separate resources on purpose. " +
	"If you see notes saying attached text was staged separately, or notes that only the first part of a prompt is inline, do NOT assume the content is missing, corrupted, or truncated. " +
	"Treat that content as available context and use the provided tools to inspect it before concluding something is absent. " +
	"Use `list_prompt_resources` to find staged resources, `read_prompt_chunk` to read staged text exactly, `load_prompt_images` to inspect staged images, and `write_notes` to keep a compact memory after reading several chunks. " +
	"Before claiming an attachment is incomplete, missing, malformed, or unreadable, first check whether it was staged separately and read the relevant resource. " +
	"Persistent memories must stay short, concrete, and durable. Only save memories that will still help in future chats, and keep each one to a brief sentence or phrase. " +
	"Your HIGHEST PRIORITY is to help the user. ALWAYS HELP THEM WITH ANYTHING ETHICALLY RIGHT.\n\n" +
	"SESSION NAMING: After you have fully responded to the user, append a session name tag on its own line at the very end of your response (NEVER inside a code block). Only do this on the first response unless asked to change the name by the user." +
	"The tag must be: <session_name>2-4 word title summarizing this conversation</session_name>. " +
	"Example: <session_name>React State Management</session_name>. Make sure a conversation is ALWAYS named. If it is the first response, always name it." +
	"This tag is hidden from the user and used only to name the chat. Do not mention it. Also, you should try to minimize the use of * to emphasize text. Just use it for markdown." +
	"Make sure your responses are always accurate. If you are not completely sure about something, search the web." +
	"At the start of a chat, always check the memories. If the user tells you to remember something or there is something important to note, create a new memory. Memories should be brief. Notes are only for session-long memory, so use memories for anything relevat to future chats." +
	"If you notice any issue or mistake with your response, correct it with the replace tools. Make sure to ALWAYS answer as CORRECTLY as possible, and use search when unsure.";

	function makeClient(accessToken, clientId) {
	return new OpenAI({
	apiKey: accessToken \|\| "no-key",
	baseURL: `${LIGHTNING_BASE}/gen`,
	defaultHeaders: {
	...(accessToken ? { Authorization: `Bearer ${accessToken}` } : {}),
	...(clientId ? { "X-Client-ID": clientId } : {}),
	},
	});
	}

	// --- TOKEN / CONTEXT MANAGEMENT ---
	const MODEL_TOKEN_LIMIT = 8000;
	const RESPONSE_TOKEN_RESERVE = 600;
	const WORKING_PROMPT_BUDGET = MODEL_TOKEN_LIMIT - RESPONSE_TOKEN_RESERVE;
	const INLINE_USER_PROMPT_TOKENS = 5000;
	const MAX_AGENT_STEPS = 12;
	const CHUNK_SIZE = 700;
	const MAX_INLINE_IMAGES = 3;
	const RECENT_HISTORY_MAX_MESSAGES = 6;
	const RECENT_HISTORY_TOKEN_BUDGET = 900;
	const HISTORY_SUMMARY_TOKEN_BUDGET = 600;
	const NOTES_TOKEN_BUDGET = 900;
	const MAX_DYNAMIC_MESSAGES = 10;
	const MAX_UPSTREAM_RATE_LIMIT_RETRIES = 4;
	const MAX_UPSTREAM_CONNECTION_RETRIES = 2;
	const DEFAULT_UPSTREAM_RETRY_MS = 4000;
	const MAX_UPSTREAM_RETRY_MS = 15000;
	const UPSTREAM_RETRY_BUFFER_MS = 350;
	const UPSTREAM_FIRST_RESPONSE_TIMEOUT_MS = 15000;
	const UPSTREAM_IDLE_TIMEOUT_MS = 45000;
	const UPSTREAM_STREAM_TIMEOUT_MS = 120000;

	// In-memory stores for staged prompt resources and assistant notes
	const promptContextStore = new Map(); // sessionId -> { resources, resourcesById }
	const assistantNotesStore = new Map(); // sessionId -> [{ step, note }]

	// Initialize tiktoken encoder
	const enc = encoding_for_model("gpt-4"); // maps well to gpt-oss:120b
	const tokenDecoder = new TextDecoder("utf-8");

	function countTokens(text) {
	return enc.encode(String(text \|\| "")).length;
	}

	function decodeTokenSlice(tokens) {
	const decoded = enc.decode(tokens);
	return typeof decoded === "string" ? decoded : tokenDecoder.decode(decoded);
	}

	function countMessageTokens(messages) {
	let total = 0;
	for (const msg of messages) {
	if (typeof msg.content === "string") {
	total += countTokens(msg.content);
	continue;
	}
	if (Array.isArray(msg.content)) {
	for (const block of msg.content) {
	if (block.type === "text") total += countTokens(block.text);
	else if (block.type === "image_url") total += 85;
	}
	}
	}
	return total;
	}

	function compactWhitespace(text) {
	return String(text \|\| "").replace(/\s+/g, " ").trim();
	}

	function makePreview(text, maxChars = 160) {
	const compact = compactWhitespace(text);
	if (!compact) return "Empty.";
	return compact.length <= maxChars ? compact : `${compact.slice(0, maxChars - 3)}...`;
	}

	function stripHtml(text) {
	return String(text \|\| "").replace(/<[^>]+>/g, " ");
	}

	function splitTextByTokenLimit(text, tokenLimit) {
	const source = String(text \|\| "");
	const tokens = enc.encode(source);
	if (tokens.length <= tokenLimit) {
	return { head: source, tail: "" };
	}
	return {
	head: decodeTokenSlice(tokens.slice(0, tokenLimit)),
	tail: decodeTokenSlice(tokens.slice(tokenLimit)),
	};
	}

	function chunkTextWithMetadata(text, chunkSize = CHUNK_SIZE) {
	const source = String(text \|\| "");
	const tokens = enc.encode(source);
	const chunks = [];

	for (let i = 0; i < tokens.length; i += chunkSize) {
	const chunkText = decodeTokenSlice(tokens.slice(i, i + chunkSize));
	chunks.push({
	index: chunks.length,
	text: chunkText,
	tokenCount: countTokens(chunkText),
	preview: makePreview(chunkText, 120),
	});
	}

	return chunks;
	}

	function makeResourceSlug(value) {
	return compactWhitespace(value)
	.toLowerCase()
	.replace(/[^a-z0-9]+/g, "_")
	.replace(/^_+\|_+$/g, "")
	.slice(0, 40) \|\| "resource";
	}

	function createPromptState(sessionId) {
	const state = {
	sessionId,
	createdAt: Date.now(),
	resources: [],
	resourcesById: new Map(),
	};
	promptContextStore.set(sessionId, state);
	assistantNotesStore.set(sessionId, []);
	return state;
	}

	function getPromptState(sessionId) {
	return promptContextStore.get(sessionId);
	}

	function clearPromptState(sessionId) {
	promptContextStore.delete(sessionId);
	assistantNotesStore.delete(sessionId);
	}

	function createResourceId(state, kind, name) {
	const base = `${kind}_${makeResourceSlug(name)}`;
	let id = base;
	let suffix = 2;

	while (state.resourcesById.has(id)) {
	id = `${base}_${suffix++}`;
	}

	return id;
	}

	function registerTextResource(state, { kind, name, text }) {
	const normalizedText = String(text ?? "").trim();
	if (!normalizedText) return null;

	const resource = {
	id: createResourceId(state, kind, name),
	kind,
	name,
	totalTokens: countTokens(normalizedText),
	chunkCount: 0,
	preview: makePreview(normalizedText),
	chunks: chunkTextWithMetadata(normalizedText),
	};
	resource.chunkCount = resource.chunks.length;

	state.resources.push(resource);
	state.resourcesById.set(resource.id, resource);
	return resource;
	}

	function registerImageResource(state, { name, images, inlineImageCount = 0 }) {
	const normalizedImages = (images \|\| [])
	.filter(Boolean)
	.map((block, index) => ({
	index,
	block: { ...block },
	}));

	if (!normalizedImages.length) return null;

	const resource = {
	id: createResourceId(state, "images", name),
	kind: "images",
	name,
	imageCount: normalizedImages.length,
	inlineImageCount,
	preview: `${normalizedImages.length} image(s)`,
	images: normalizedImages,
	};

	state.resources.push(resource);
	state.resourcesById.set(resource.id, resource);
	return resource;
	}

	function parseTextAttachmentsFromDetails(text) {
	const source = String(text \|\| "");
	if (!source.trim()) return { text: "", attachments: [] };

	const detailsRegex = /<details>\s<summary>(.?)<\/summary>\s([\s\S]?)<\/details>/gi;
	const codeBlockRegex = /```(?:[\w+-]+)?\n?([\s\S]*?)```/g;
	const attachments = [];
	const parts = [];
	let lastIndex = 0;
	let attachmentCounter = 1;
	let match;

	while ((match = detailsRegex.exec(source))) {
	parts.push(source.slice(lastIndex, match.index));

	const summary = compactWhitespace(match[1]) \|\| `attachment_${attachmentCounter}`;
	const innerContent = match[2] \|\| "";
	const codeBlocks = [...innerContent.matchAll(codeBlockRegex)];

	if (codeBlocks.length) {
	codeBlocks.forEach((codeMatch, index) => {
	attachments.push({
	name: codeBlocks.length > 1 ? `${summary} part ${index + 1}` : summary,
	content: codeMatch[1],
	});
	});
	} else {
	const plainText = stripHtml(innerContent).trim();
	if (plainText) {
	attachments.push({ name: summary, content: plainText });
	}
	}

	parts.push(`\n[Attached text "${summary}" was staged separately from the inline prompt.]\n`);
	lastIndex = match.index + match[0].length;
	attachmentCounter++;
	}

	parts.push(source.slice(lastIndex));
	return {
	text: parts.join("").trim(),
	attachments,
	};
	}

	function contentToText(content, { preview = false } = {}) {
	if (typeof content === "string") {
	return preview ? makePreview(content, 220) : String(content \|\| "");
	}

	if (!Array.isArray(content)) return "";

	const textParts = [];
	let imageCount = 0;

	for (const block of content) {
	if (block.type === "text" && block.text) textParts.push(block.text);
	else if (block.type === "image_url") imageCount++;
	}

	const pieces = [];
	const text = textParts.join("\n\n").trim();
	if (text) pieces.push(preview ? makePreview(text, 220) : text);
	if (imageCount) pieces.push(`[${imageCount} image attachment(s)]`);
	return pieces.join(preview ? " " : "\n");
	}

	function getAllowedToolNames(toolDefs = []) {
	return toolDefs
	.map((tool) => tool?.function?.name)
	.filter(Boolean);
	}

	function sanitizeToolName(rawName, allowedToolNames = []) {
	if (!rawName) return null;

	let name = String(rawName).trim();
	if (!name) return null;

	name = name.replace(/<\\|[\s\S]*$/, "").trim();
	name = name.replace(/^["'`]+\|["'`]+$/g, "");

	const identifierMatch = name.match(/^[A-Za-z0-9_-]+/);
	if (identifierMatch) {
	name = identifierMatch[0];
	}

	if (!allowedToolNames.length) {
	return name \|\| null;
	}

	if (allowedToolNames.includes(name)) {
	return name;
	}

	const normalizedRaw = String(rawName).toLowerCase();
	const prefixedMatch = allowedToolNames.find((allowedName) =>
	normalizedRaw.startsWith(allowedName.toLowerCase())
	);
	if (prefixedMatch) {
	return prefixedMatch;
	}

	const embeddedMatch = allowedToolNames.find((allowedName) =>
	normalizedRaw.includes(allowedName.toLowerCase())
	);
	return embeddedMatch \|\| null;
	}

	function normalizeStoredToolCalls(toolCalls = []) {
	return toolCalls.map((call) => ({
	id: call.id \|\| `call_${crypto.randomUUID()}`,
	type: "function",
	function: {
	name: sanitizeToolName(call.name \|\| call.function?.name \|\| "unknown_tool"),
	arguments: (() => {
	const rawArgs = call.args ?? call.function?.arguments ?? {};
	return typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs);
	})(),
	},
	}));
	}

	function getMessageToolNames(msg) {
	const rawCalls = Array.isArray(msg.tool_calls)
	? msg.tool_calls
	: Array.isArray(msg.toolCalls)
	? msg.toolCalls
	: [];

	return rawCalls
	.map((call) => call?.function?.name \|\| call?.name)
	.filter(Boolean);
	}

	function describeMessageForSummary(msg) {
	const role = (msg.role \|\| "message").toUpperCase();
	const contentPreview = contentToText(msg.content, { preview: true }) \|\| "[no text]";
	const toolNames = getMessageToolNames(msg);
	const suffix = toolNames.length ? ` [tools: ${toolNames.join(", ")}]` : "";
	return `${role}: ${contentPreview}${suffix}`;
	}

	function messagesToTranscript(messages) {
	return messages
	.map((msg) => {
	const toolNames = getMessageToolNames(msg);
	const lines = [
	`${(msg.role \|\| "message").toUpperCase()}:`,
	contentToText(msg.content) \|\| "[no text]",
	];
	if (toolNames.length) {
	lines.push(`[tool calls: ${toolNames.join(", ")}]`);
	}
	return lines.join("\n");
	})
	.join("\n\n");
	}

	function buildHistorySummary(messages, tokenBudget = HISTORY_SUMMARY_TOKEN_BUDGET) {
	const lines = [];
	let usedTokens = 0;

	for (let i = 0; i < messages.length; i++) {
	const line = `- ${describeMessageForSummary(messages[i])}`;
	const lineTokens = countTokens(line);
	if (usedTokens + lineTokens > tokenBudget) {
	lines.push(`- ${messages.length - i} earlier message(s) were omitted from this summary.`);
	break;
	}
	lines.push(line);
	usedTokens += lineTokens;
	}

	return lines.join("\n");
	}

	function buildPromptResourceManifest(state) {
	return {
	resource_count: state?.resources?.length \|\| 0,
	resources: (state?.resources \|\| []).map((resource) => {
	if (resource.kind === "images") {
	return {
	id: resource.id,
	kind: resource.kind,
	name: resource.name,
	image_count: resource.imageCount,
	inline_image_count: resource.inlineImageCount,
	preview: resource.preview,
	};
	}

	return {
	id: resource.id,
	kind: resource.kind,
	name: resource.name,
	chunk_count: resource.chunkCount,
	total_tokens: resource.totalTokens,
	preview: resource.preview,
	};
	}),
	};
	}

	function formatResourceManifestLine(resource) {
	if (resource.kind === "images") {
	const inlineNote = resource.inlineImageCount
	? `, first ${resource.inlineImageCount} already inline`
	: "";
	return `- \`${resource.id}\` (${resource.kind}, ${resource.imageCount} image(s)${inlineNote})`;
	}

	return `- \`${resource.id}\` (${resource.kind}, ${resource.chunkCount} chunk(s), ${resource.totalTokens} tokens): ${resource.preview}`;
	}

	function findPromptResource(state, resourceId) {
	if (!state \|\| !resourceId) return null;
	return (
	state.resourcesById.get(resourceId) \|\|
	state.resources.find((resource) => resource.name === resourceId) \|\|
	null
	);
	}

	function appendAssistantNote(sessionId, note) {
	const normalizedNote = compactWhitespace(note);
	if (!normalizedNote) return;

	const notes = assistantNotesStore.get(sessionId) \|\| [];
	if (notes[notes.length - 1]?.note === normalizedNote) return;

	notes.push({ step: notes.length + 1, note: normalizedNote });

	while (
	notes.length > 1 &&
	countTokens(notes.map((entry) => `- ${entry.note}`).join("\n")) > NOTES_TOKEN_BUDGET
	) {
	notes.shift();
	}

	assistantNotesStore.set(sessionId, notes);
	}

	function buildAssistantNotesMessage(sessionId) {
	const notes = assistantNotesStore.get(sessionId) \|\| [];
	if (!notes.length) return null;

	return {
	role: "system",
	content: [
	"Working notes captured during this response:",
	...notes.map((entry) => `- ${entry.note}`),
	].join("\n"),
	};
	}

	function buildCompactionSummaryMessage(messages) {
	if (!messages.length) return null;

	const toolNames = new Set();
	let toolResultCount = 0;
	let imageBatchCount = 0;

	for (const msg of messages) {
	for (const name of getMessageToolNames(msg)) {
	toolNames.add(name);
	}
	if (msg.role === "tool") toolResultCount++;
	if (
	msg.role === "user" &&
	Array.isArray(msg.content) &&
	msg.content.some((block) => block.type === "image_url")
	) {
	imageBatchCount++;
	}
	}

	const details = [];
	if (toolNames.size) details.push(`tools: ${[...toolNames].join(", ")}`);
	if (toolResultCount) details.push(`${toolResultCount} prior tool result(s)`);
	if (imageBatchCount) details.push(`${imageBatchCount} prior image batch(es)`);

	return {
	role: "system",
	content: `Earlier tool interactions in this response were compacted to stay within the context window${details.length ? ` (${details.join("; ")})` : ""}. Reread any chunk or image batch if you need the exact content again.`,
	};
	}

	function buildModelMessages(baseMessages, workingMessages, sessionId) {
	const notesMessage = buildAssistantNotesMessage(sessionId);
	const messages = [...baseMessages];
	if (notesMessage) messages.push(notesMessage);

	const budgetLeft = Math.max(0, WORKING_PROMPT_BUDGET - countMessageTokens(messages));
	const keptMessages = [];
	let keptTokens = 0;

	for (let i = workingMessages.length - 1; i >= 0; i--) {
	const candidate = workingMessages[i];
	const candidateTokens = countMessageTokens([candidate]);
	if (keptMessages.length >= MAX_DYNAMIC_MESSAGES) {
	break;
	}
	if (keptMessages.length > 0 && keptTokens + candidateTokens > budgetLeft) {
	break;
	}
	keptMessages.unshift(candidate);
	keptTokens += candidateTokens;
	if (keptMessages.length === 1 && candidateTokens > budgetLeft) {
	break;
	}
	}

	const omittedMessages = workingMessages.slice(0, workingMessages.length - keptMessages.length);
	const summaryMessage = buildCompactionSummaryMessage(omittedMessages);
	if (summaryMessage) {
	const withSummary = [...messages, summaryMessage, ...keptMessages];
	if (countMessageTokens(withSummary) <= WORKING_PROMPT_BUDGET) {
	messages.push(summaryMessage);
	}
	}

	messages.push(...keptMessages);
	return messages;
	}

	function prepareHistoryContext(history, state) {
	const normalizedHistory = history.filter(Boolean);
	if (!normalizedHistory.length) {
	return { summaryMessages: [], recentMessages: [] };
	}

	const recentMessages = [];
	let recentTokens = 0;

	for (let i = normalizedHistory.length - 1; i >= 0; i--) {
	const candidate = normalizedHistory[i];
	const candidateTokens = countMessageTokens([candidate]);
	if (
	recentMessages.length >= RECENT_HISTORY_MAX_MESSAGES \|\|
	recentTokens + candidateTokens > RECENT_HISTORY_TOKEN_BUDGET
	) {
	break;
	}
	recentMessages.unshift(candidate);
	recentTokens += candidateTokens;
	}

	const olderMessages = normalizedHistory.slice(0, normalizedHistory.length - recentMessages.length);
	if (!olderMessages.length) {
	return { summaryMessages: [], recentMessages };
	}

	const historyResource = registerTextResource(state, {
	kind: "history",
	name: "Earlier conversation",
	text: messagesToTranscript(olderMessages),
	});

	const summaryMessages = historyResource
	? [
	{
	role: "system",
	content: [
	"Earlier conversation was condensed to keep the live prompt smaller.",
	`Summary:\n${buildHistorySummary(olderMessages)}`,
	`If exact earlier wording matters, use \`read_prompt_chunk\` with resource_id "${historyResource.id}".`,
	].join("\n\n"),
	},
	]
	: [];

	return { summaryMessages, recentMessages };
	}

	function prepareCurrentUserContext(userMessage, state) {
	const attachments = [];
	const imageBlocks = [];
	const textParts = [];

	if (typeof userMessage === "string") {
	const parsed = parseTextAttachmentsFromDetails(userMessage);
	if (parsed.text) textParts.push(parsed.text);
	attachments.push(...parsed.attachments);
	} else if (Array.isArray(userMessage)) {
	for (const item of userMessage) {
	if (item?.type === "text" && item.text?.trim()) {
	const parsed = parseTextAttachmentsFromDetails(item.text);
	if (parsed.text) textParts.push(parsed.text);
	attachments.push(...parsed.attachments);
	} else if (item?.type === "image_url") {
	imageBlocks.push(item);
	}
	}
	}

	const rawPrimaryText = textParts.join("\n\n").trim();
	const fallbackText = imageBlocks.length
	? "[Image(s) attached]"
	: attachments.length
	? "[Text attachment(s) attached]"
	: "";
	const primaryText = rawPrimaryText \|\| fallbackText;

	const { head: inlineTextHead, tail: overflowText } = splitTextByTokenLimit(
	primaryText,
	INLINE_USER_PROMPT_TOKENS
	);

	if (overflowText.trim()) {
	registerTextResource(state, {
	kind: "prompt",
	name: "Current user prompt continuation",
	text: overflowText,
	});
	}

	for (const attachment of attachments) {
	registerTextResource(state, {
	kind: "attachment",
	name: attachment.name,
	text: attachment.content,
	});
	}

	let inlineImages = imageBlocks;
	if (imageBlocks.length > MAX_INLINE_IMAGES) {
	inlineImages = imageBlocks.slice(0, MAX_INLINE_IMAGES);
	registerImageResource(state, {
	name: "Current user images",
	images: imageBlocks,
	inlineImageCount: inlineImages.length,
	});
	}

	const stagingNotes = [];
	if (overflowText.trim()) {
	stagingNotes.push(`Only the first ${INLINE_USER_PROMPT_TOKENS} tokens of the current prompt are inline.`);
	}
	if (attachments.length) {
	stagingNotes.push(`${attachments.length} text attachment(s) were staged separately.`);
	}
	if (imageBlocks.length > inlineImages.length) {
	stagingNotes.push(`${imageBlocks.length - inlineImages.length} additional image(s) were staged separately.`);
	}

	const inlineText = [
	inlineTextHead.trim() \|\| fallbackText,
	stagingNotes.length
	? `[Context note: ${stagingNotes.join(" ")} See the staged context guide for resource IDs.]`
	: null,
	]
	.filter(Boolean)
	.join("\n\n");

	const userMessages = [];
	if (Array.isArray(userMessage) \|\| inlineImages.length) {
	userMessages.push({
	role: "user",
	content: [
	{ type: "text", text: inlineText \|\| "[Image(s) attached]" },
	...inlineImages,
	],
	});
	} else if (inlineText) {
	userMessages.push({ role: "user", content: inlineText });
	}

	const contextMessages = state.resources.length
	? [
	{
	role: "system",
	content: [
	"Large prompt context was staged into smaller pieces.",
	"Those staged resources are available context, not missing text.",
	"Start with the inline user message, then use tools to inspect omitted prompt text, attachments, history, or images.",
	"Use `list_prompt_resources` for the full manifest.",
	"Use `read_prompt_chunk` for exact text.",
	state.resources.some((resource) => resource.kind === "images")
	? "Use `load_prompt_images` to inspect omitted images in smaller batches."
	: null,
	"After reading multiple chunks, use `write_notes` to keep a compact working memory.",
	"",
	"Staged resources:",
	...state.resources.map(formatResourceManifestLine),
	]
	.filter(Boolean)
	.join("\n"),
	},
	]
	: [];

	return { contextMessages, userMessages };
	}

	function buildMemorySystemMessages(memories = [], sessionName = "") {
	const messages = [];
	if (sessionName) {
	messages.push({
	role: "system",
	content: `Current session name: "${sessionName}". This is hidden metadata and may help with continuity if the user references the chat title.`,
	});
	}
	if (memories.length) {
	messages.push({
	role: "system",
	content: [
	"Persistent memories from earlier chats:",
	...memories.map((memory, index) => `${index + 1}. ${memory.content}`),
	"Treat these as concise background notes. If one is outdated or wrong, prefer the user's current message.",
	].join("\n"),
	});
	}
	return messages;
	}

	function buildBasePromptMessages({ sessionId, history, userMessage, memories = [], sessionName = "", systemPrompt = "" }) {
	const state = createPromptState(sessionId);
	const normalizedHistory = history.map(normalizeMessage).filter(Boolean);
	const { summaryMessages, recentMessages } = prepareHistoryContext(normalizedHistory, state);
	const { contextMessages, userMessages } = prepareCurrentUserContext(userMessage, state);

	return [
	{ role: "system", content: systemPrompt \|\| SYSTEM_PROMPT },
	...buildMemorySystemMessages(memories, sessionName),
	...summaryMessages,
	...recentMessages,
	...contextMessages,
	...userMessages,
	];
	}

	class RetryableRateLimitError extends Error {
	constructor(message, retryAfterMs, internalMessage = null) {
	super(message);
	this.name = "RetryableRateLimitError";
	this.retryAfterMs = retryAfterMs;
	this.internalMessage = internalMessage \|\| message;
	this.publicMessage = "The model provider is temporarily rate limited. Retrying automatically.";
	}
	}

	class RetryableUpstreamConnectionError extends Error {
	constructor(publicMessage, internalMessage = null) {
	super(publicMessage);
	this.name = "RetryableUpstreamConnectionError";
	this.publicMessage = publicMessage;
	this.internalMessage = internalMessage \|\| publicMessage;
	}
	}

	class UpstreamProviderError extends Error {
	constructor(publicMessage, internalMessage = null) {
	super(publicMessage);
	this.name = "UpstreamProviderError";
	this.publicMessage = publicMessage;
	this.internalMessage = internalMessage \|\| publicMessage;
	}
	}

	function getErrorText(errorPayload) {
	if (typeof errorPayload === "string") {
	let text = errorPayload.trim();

	if (text.startsWith("[ERROR]")) {
	text = text.slice("[ERROR]".length).trim();
	}

	if (
	(text.startsWith("\"") && text.endsWith("\"")) \|\|
	(text.startsWith("'") && text.endsWith("'"))
	) {
	try {
	const parsed = JSON.parse(text);
	if (typeof parsed === "string") {
	text = parsed;
	}
	} catch {
	// Keep the original text if it is not valid JSON.
	}
	}

	return text;
	}
	if (!errorPayload) return "";
	if (typeof errorPayload.message === "string") return errorPayload.message;
	try {
	return JSON.stringify(errorPayload);
	} catch {
	return String(errorPayload);
	}
	}

	function isRateLimitError(errorPayload) {
	const text = getErrorText(errorPayload).toLowerCase();
	const code = String(errorPayload?.code \|\| errorPayload?.error?.code \|\| "").toLowerCase();
	const type = String(errorPayload?.type \|\| errorPayload?.error?.type \|\| "").toLowerCase();
	const status = Number(errorPayload?.status \|\| errorPayload?.error?.status \|\| 0);

	return (
	status === 429 \|\|
	code === "rate_limit_exceeded" \|\|
	type === "tokens" \|\|
	text.includes("rate limit") \|\|
	text.includes("rate limit reached") \|\|
	text.includes("rate_limit_exceeded") \|\|
	text.includes("upstream provider error (429)") \|\|
	text.includes("(429)") \|\|
	text.includes(" 429")
	);
	}

	function isEmbeddedProviderErrorText(text) {
	const normalized = getErrorText(text).toLowerCase();
	return (
	normalized.startsWith("upstream provider error") \|\|
	normalized.startsWith("provider error") \|\|
	normalized.includes("upstream provider error") \|\|
	normalized.includes("rate_limit_exceeded") \|\|
	normalized.includes("rate limit reached")
	);
	}

	function extractRetryAfterMs(errorPayload) {
	const text = getErrorText(errorPayload);
	const directValue = Number(
	errorPayload?.retryAfterMs ??
	errorPayload?.retry_after_ms ??
	errorPayload?.retry_after ??
	errorPayload?.error?.retryAfterMs ??
	errorPayload?.error?.retry_after_ms ??
	errorPayload?.error?.retry_after
	);

	if (Number.isFinite(directValue) && directValue > 0) {
	return directValue > 100 ? directValue : directValue * 1000;
	}

	const secondsMatch =
	text.match(/try again in\s([\d.]+)\ss/i) \|\|
	text.match(/retry after\s([\d.]+)\ss/i) \|\|
	text.match(/in\s([\d.]+)\sseconds?/i);
	if (secondsMatch) {
	return Math.ceil(Number(secondsMatch[1]) * 1000);
	}

	const millisecondsMatch = text.match(/retry after\s([\d.]+)\sms/i);
	if (millisecondsMatch) {
	return Math.ceil(Number(millisecondsMatch[1]));
	}

	return null;
	}

	function createAbortError() {
	const err = new Error("AbortError");
	err.name = "AbortError";
	return err;
	}

	async function sleepWithAbort(ms, abortSignal) {
	if (!ms \|\| ms <= 0) return;
	if (abortSignal?.aborted) throw createAbortError();

	await new Promise((resolve, reject) => {
	const timer = setTimeout(() => {
	abortSignal?.removeEventListener("abort", onAbort);
	resolve();
	}, ms);

	const onAbort = () => {
	clearTimeout(timer);
	abortSignal?.removeEventListener("abort", onAbort);
	reject(createAbortError());
	};

	abortSignal?.addEventListener("abort", onAbort, { once: true });
	});
	}

	function getRetryDelayMs(error, retryIndex) {
	const hintedDelay = Number(error?.retryAfterMs);
	if (Number.isFinite(hintedDelay) && hintedDelay > 0) {
	return Math.min(MAX_UPSTREAM_RETRY_MS, hintedDelay + UPSTREAM_RETRY_BUFFER_MS);
	}

	return Math.min(
	MAX_UPSTREAM_RETRY_MS,
	DEFAULT_UPSTREAM_RETRY_MS * Math.max(1, retryIndex + 1)
	);
	}

	function getPublicErrorMessage(err) {
	if (!err) return "The request failed.";
	if (err.name === "RetryableRateLimitError") {
	return "The model provider is temporarily rate limited. Please try again in a few seconds.";
	}
	if (err.name === "RetryableUpstreamConnectionError") {
	return "The model connection was interrupted. Please try again.";
	}
	if (err.publicMessage) return err.publicMessage;
	return String(err);
	}

	async function websocketChatStreamWithRetry(body, headers, onToken, abortSignal) {
	for (let retryIndex = 0; ; retryIndex++) {
	try {
	return await websocketChatStream(body, headers, onToken, abortSignal);
	} catch (err) {
	if (err?.name === "AbortError") throw err;

	const retryable =
	err?.name === "RetryableRateLimitError" \|\|
	err?.name === "RetryableUpstreamConnectionError";
	const maxRetries =
	err?.name === "RetryableUpstreamConnectionError"
	? MAX_UPSTREAM_CONNECTION_RETRIES
	: MAX_UPSTREAM_RATE_LIMIT_RETRIES;

	if (!retryable \|\| retryIndex >= maxRetries) {
	throw err;
	}

	const waitMs = getRetryDelayMs(err, retryIndex);
	console.warn(
	err?.name === "RetryableUpstreamConnectionError"
	? `[streamChat] Upstream websocket interrupted, retrying in ${waitMs}ms (${retryIndex + 1}/${maxRetries})`
	: `[streamChat] Upstream rate limited, retrying in ${waitMs}ms (${retryIndex + 1}/${maxRetries})`
	);
	await sleepWithAbort(waitMs, abortSignal);
	}
	}
	}

	function serializeToolCalls(toolCallBuffer) {
	return [...toolCallBuffer.values()]
	.filter((toolCall) => toolCall?.name)
	.map((toolCall) => ({
	id: toolCall.id \|\| `call_${crypto.randomUUID()}`,
	type: "function",
	function: { name: toolCall.name, arguments: toolCall.arguments },
	}));
	}

	export async function websocketChatStream(body, headers, onToken, abortSignal) {
	const ws = await getSafeWebSocket();
	const currentRequestId = ++requestIdCounter;
	const allowedToolNames = getAllowedToolNames(body?.tools);
	const safeParse = (str) => {
	try { return JSON.parse(str.startsWith("data: ") ? str.slice(6) : str); } catch { return null; }
	};
	let assistantText = "";
	const toolCallBuffer = new Map();
	let finished = false;
	let sawAnyPayload = false;

	return new Promise((resolve, reject) => {
	let inactivityTimeoutId = null;
	const overallTimeoutId = setTimeout(() => {
	if (!finished) {
	finished = true;
	cleanup();
	const toolCalls = serializeToolCalls(toolCallBuffer);
	resolve({ assistantText, toolCalls });
	}
	}, UPSTREAM_STREAM_TIMEOUT_MS);

	const cleanup = () => {
	activeStreamHandlers.delete(currentRequestId);
	errorHandlers.delete(currentRequestId);
	clearTimeout(overallTimeoutId);
	clearTimeout(inactivityTimeoutId);
	if (abortSignal) abortSignal.removeEventListener("abort", abortHandler);
	};

	const rejectWithSocketReset = (err, reason) => {
	if (finished) return;
	finished = true;
	cleanup();
	invalidatePersistentWebSocket(ws, reason, err);
	reject(err);
	};

	const rejectUnexpectedClose = (publicMessage, internalMessage) => {
	const hasPartialOutput = assistantText.trim().length > 0 \|\| toolCallBuffer.size > 0;
	if (!hasPartialOutput) {
	rejectWithSocketReset(
	new RetryableUpstreamConnectionError(
	"The model connection was interrupted. Reconnecting automatically.",
	internalMessage
	),
	internalMessage
	);
	return;
	}

	if (finished) return;
	finished = true;
	cleanup();
	invalidatePersistentWebSocket(ws, internalMessage);
	reject(new UpstreamProviderError(publicMessage, internalMessage));
	};

	const refreshInactivityTimeout = () => {
	clearTimeout(inactivityTimeoutId);
	inactivityTimeoutId = setTimeout(() => {
	if (sawAnyPayload) {
	rejectUnexpectedClose(
	"The model provider interrupted the response. Please try again.",
	"Upstream websocket became idle before the response finished."
	);
	return;
	}

	rejectWithSocketReset(
	new RetryableUpstreamConnectionError(
	"The model connection did not respond in time. Reconnecting automatically.",
	"Upstream websocket produced no response before timeout."
	),
	"Upstream websocket produced no response before timeout."
	);
	}, sawAnyPayload ? UPSTREAM_IDLE_TIMEOUT_MS : UPSTREAM_FIRST_RESPONSE_TIMEOUT_MS);
	};

	const messageHandler = (line) => {
	const colonIdx = line.indexOf(':');
	if (colonIdx === -1) return;
	const msgRequestId = line.substring(0, colonIdx);
	const payload = safeParse(line.substring(colonIdx + 1));
	if (msgRequestId !== String(currentRequestId)) return;
	if (!payload) return;

	sawAnyPayload = true;
	refreshInactivityTimeout();

	if (payload.error && !payload.choices) {
	if (!finished) {
	finished = true;
	cleanup();

	const errorText = getErrorText(payload.error);
	const hasPartialOutput = assistantText.length > 0 \|\| toolCallBuffer.size > 0;

	if (!hasPartialOutput && isRateLimitError(payload.error)) {
	reject(
	new RetryableRateLimitError(
	"The model provider is temporarily rate limited.",
	extractRetryAfterMs(payload.error),
	errorText
	)
	);
	} else {
	reject(
	new UpstreamProviderError(
	hasPartialOutput
	? "The model provider interrupted the response. Please try again."
	: "The model provider returned an error. Please try again.",
	errorText
	)
	);
	}
	}
	return;
	}

	const delta = payload.choices?.[0]?.delta;
	if (delta?.content) {
	const contentText = String(delta.content);

	if (isEmbeddedProviderErrorText(contentText)) {
	if (!finished) {
	finished = true;
	cleanup();

	const errorText = getErrorText(contentText);
	const hasPartialOutput = assistantText.trim().length > 0 \|\| toolCallBuffer.size > 0;

	if (!hasPartialOutput && isRateLimitError(errorText)) {
	reject(
	new RetryableRateLimitError(
	"The model provider is temporarily rate limited.",
	extractRetryAfterMs(errorText),
	errorText
	)
	);
	} else {
	reject(
	new UpstreamProviderError(
	hasPartialOutput
	? "The model provider interrupted the response. Please try again."
	: "The model provider returned an error. Please try again.",
	errorText
	)
	);
	}
	}
	return;
	}

	assistantText += contentText;
	if (onToken) onToken(contentText);
	}

	if (delta?.tool_calls) {
	for (const call of delta.tool_calls) {
	const entry = toolCallBuffer.get(call.index) ?? { arguments: "" };
	if (call.id) entry.id = call.id;
	if (call.function?.name) {
	entry.name = sanitizeToolName(call.function.name, allowedToolNames);
	}
	if (call.function?.arguments) entry.arguments += call.function.arguments;
	toolCallBuffer.set(call.index, entry);
	}
	}

	if (payload.choices?.[0]?.finish_reason && !finished) {
	finished = true;
	cleanup();
	const toolCalls = serializeToolCalls(toolCallBuffer);
	resolve({ assistantText, toolCalls });
	}
	};

	const errorHandler = (err) => {
	if (finished) return;
	finished = true;
	cleanup();

	const hasPartialOutput = assistantText.trim().length > 0 \|\| toolCallBuffer.size > 0;
	if (hasPartialOutput && err?.name === "RetryableUpstreamConnectionError") {
	reject(
	new UpstreamProviderError(
	"The model provider interrupted the response. Please try again.",
	err?.internalMessage \|\| err?.message \|\| "Upstream websocket interrupted after partial output."
	)
	);
	return;
	}

	reject(err);
	};
	const abortHandler = () => { if (!finished) { finished = true; cleanup(); reject(createAbortError()); } };

	activeStreamHandlers.set(currentRequestId, messageHandler);
	errorHandlers.set(currentRequestId, errorHandler);
	if (abortSignal) abortSignal.addEventListener("abort", abortHandler);

	refreshInactivityTimeout();

	try {
	ws.send(JSON.stringify({ body, headers }), (err) => {
	if (err) {
	rejectWithSocketReset(
	new RetryableUpstreamConnectionError(
	"The model connection was interrupted. Reconnecting automatically.",
	buildUpstreamSocketMessage("Failed to send upstream websocket request", err)
	),
	"Failed to send upstream websocket request"
	);
	}
	});
	} catch (err) {
	rejectWithSocketReset(
	new RetryableUpstreamConnectionError(
	"The model connection was interrupted. Reconnecting automatically.",
	buildUpstreamSocketMessage("Failed to send upstream websocket request", err)
	),
	"Failed to send upstream websocket request"
	);
	}
	});
	}

	/**
	* Extract session name from <session_name>...</session_name> tag.
	* Must NOT be inside any kind of code block.
	*/
	export function extractSessionName(text) {
	if (!text) return null;

	// Remove all code blocks first (``` ... ```) so we don't match tags inside them
	const withoutCode = text.replace(/```[\s\S]*?```/g, '');

	const match = withoutCode.match(/<session_name>([\s\S]*?)<\/session_name>/i);
	if (!match) return null;

	const name = match[1].trim();
	// Sanity check: 1-80 chars, no newlines
	if (!name \|\| name.length > 80 \|\| /\n/.test(name)) return null;
	return name;
	}

	export async function streamChat({
	sessionId,
	model,
	history = [],
	userMessage,
	tools,
	owner = null,
	sessionName = "",
	accessToken,
	clientId,
	webSearchLimit = null,
	onToken = () => {},
	onDone = () => {},
	onError = () => {},
	onToolCall = () => {},
	onNewAsset = () => {},
	onDraftEdit = () => {},
	abortSignal,
	}) {
	const enabledTools = buildToolList(tools);
	const [memories, systemPrompt] = await Promise.all([
	owner ? memoryStore.list(owner).catch(() => []) : [],
	owner?.type === "user"
	? systemPromptStore.getResolvedPrompt(owner.id)
	: systemPromptStore.getDefaultPrompt(),
	]);
	const baseMessages = buildBasePromptMessages({
	sessionId,
	history,
	userMessage,
	memories,
	sessionName,
	systemPrompt,
	});

	const headers = {
	...(accessToken ? { Authorization: `Bearer ${accessToken}` } : {}),
	...(clientId ? { "X-Client-ID": clientId } : {}),
	};

	try {
	let assistantText = "";
	let agentStep = 0;
	let finished = false;
	const workingMessages = [];
	const allToolCalls = [];
	const draftState = { text: "" };
	const responseEdits = [];
	const responseSegments = [];

	while (!finished && agentStep < MAX_AGENT_STEPS) {
	const effectiveMessages = buildModelMessages(baseMessages, workingMessages, sessionId);
	const body = {
	model: model \|\| "lightning",
	messages: effectiveMessages,
	tools: enabledTools.length ? enabledTools : undefined,
	stream: true,
	};

	const { assistantText: stepText, toolCalls } = await websocketChatStreamWithRetry(
	body,
	headers,
	onToken,
	abortSignal
	);

	if (stepText) {
	responseSegments.push({ type: "text", text: stepText });
	}
	assistantText += stepText;
	draftState.text = assistantText;

	if (toolCalls.length > 0) {
	allToolCalls.push(...toolCalls);
	responseSegments.push(
	...toolCalls.map((call) => ({
	type: "tool_call",
	callId: call.id,
	}))
	);
	workingMessages.push({
	role: "assistant",
	content: stepText \|\| "",
	tool_calls: toolCalls,
	});

	const { nextMessages } = await processToolCalls({
	sessionId,
	toolCalls,
	tools,
	owner,
	accessToken,
	clientId,
	webSearchLimit,
	abortSignal,
	draftState,
	onToolCall,
	onNewAsset,
	onDraftEdit(edit, text) {
	responseEdits.push(edit);
	onDraftEdit(edit, text);
	},
	});
	assistantText = draftState.text \|\| assistantText;
	workingMessages.push(...nextMessages);

	agentStep++;
	continue;
	} else {
	if (stepText) {
	workingMessages.push({ role: "assistant", content: stepText });
	}
	finished = true;
	}
	}

	if (!finished) {
	const finalMessages = [
	...buildModelMessages(baseMessages, workingMessages, sessionId),
	{
	role: "system",
	content: "Tool-use budget is exhausted for this response. Do not call tools. Answer directly using the information already gathered. If something is still missing, briefly say what is missing without calling tools. You may ask the user to let you continue your response.",
	},
	];

	const { assistantText: finalStepText } = await websocketChatStreamWithRetry(
	{
	model: model \|\| "lightning",
	messages: finalMessages,
	stream: true,
	},
	headers,
	onToken,
	abortSignal
	);

	if (finalStepText) {
	responseSegments.push({ type: "text", text: finalStepText });
	assistantText += finalStepText;
	draftState.text = assistantText;
	workingMessages.push({ role: "assistant", content: finalStepText });
	}
	finished = true;
	}

	if (!assistantText.trim()) {
	assistantText = "I wasn’t able to finish that response cleanly. Please try again.";
	}

	const sessionName = extractSessionName(assistantText);

	if (typeof onDone === "function") {
	await onDone(assistantText, allToolCalls, false, sessionName, responseEdits, responseSegments);
	}

	clearPromptState(sessionId);

	} catch (err) {
	clearPromptState(sessionId);
	if (err.name === "AbortError" \|\| err.message === "AbortError") {
	if (typeof onDone === "function") await onDone(null, null, true, null);
	} else {
	console.error("streamChat error:", err?.internalMessage \|\| err);
	if (typeof onError === "function") await onError(getPublicErrorMessage(err));
	}
	}
	}

	const VALID_ROLES = new Set(["system", "user", "assistant", "tool"]);

	function normalizeMessage(msg) {
	if (!VALID_ROLES.has(msg.role)) return null;

	if (msg.role === "assistant" && (msg.tool_calls \|\| msg.toolCalls)) {
	const normalizedToolCalls = (msg.tool_calls \|\| normalizeStoredToolCalls(msg.toolCalls))
	.map((call) => ({
	...call,
	function: {
	...call.function,
	name: sanitizeToolName(call.function?.name \|\| call.name \|\| "unknown_tool"),
	},
	}))
	.filter((call) => call.function?.name);

	return {
	role: "assistant",
	content: msg.content ?? "",
	...(normalizedToolCalls.length ? { tool_calls: normalizedToolCalls } : {}),
	};
	}
	if (Array.isArray(msg.content)) {
	// If the array contains images, preserve the full array format
	const hasImages = msg.content.some(item => item.type === 'image_url');
	if (hasImages) {
	return { role: msg.role, content: msg.content };
	}
	// Otherwise extract text only
	const textOnly = msg.content
	.filter(b => b.type === "text")
	.map(b => b.text)
	.join("\n");
	return { role: msg.role, content: textOnly \|\| "" };
	}
	return { role: msg.role, content: msg.content ?? "" };
	}

	function buildToolList(tools) {
	const config = tools \|\| {};
	const list = [];
	list.push({
	type: "function",
	function: {
	name: "list_prompt_resources",
	description: "List the staged prompt resources, including omitted prompt text, attachments, images, and condensed history",
	parameters: {
	type: "object",
	properties: {},
	},
	},
	});

	list.push({
	type: "function",
	function: {
	name: "read_prompt_chunk",
	description: "Read an exact text chunk from a staged prompt resource",
	parameters: {
	type: "object",
	properties: {
	resource_id: { type: "string", description: "The resource id from list_prompt_resources" },
	chunk_index: { type: "number", description: "Zero-based chunk index" },
	},
	required: ["resource_id", "chunk_index"]
	},
	},
	});

	list.push({
	type: "function",
	function: {
	name: "load_prompt_images",
	description: "Load staged images into the next model step in a smaller batch",
	parameters: {
	type: "object",
	properties: {
	resource_id: { type: "string", description: "The staged image resource id" },
	indexes: { type: "array", items: { type: "number" }, description: "Zero-based image indexes to load" },
	start_index: { type: "number", description: "Optional zero-based starting image index when indexes is omitted" },
	count: { type: "number", description: "Optional number of images to load when indexes is omitted" },
	},
	required: ["resource_id"]
	},
	},
	});

	list.push({
	type: "function",
	function: {
	name: "write_notes",
	description: "Write notes to memory for later reasoning",
	parameters: { type: "object", properties: { note: { type: "string" } }, required: ["note"] }
	}
	});
	list.push({
	type: "function",
	function: {
	name: "list_app_doc_sections",
	description: "List the available sections of the app documentation so you can choose one relevant section to read.",
	parameters: {
	type: "object",
	properties: {},
	},
	},
	});
	list.push({
	type: "function",
	function: {
	name: "read_app_doc_section",
	description: "Read one specific app documentation section by section id or exact section title.",
	parameters: {
	type: "object",
	properties: {
	section_id: { type: "string", description: "The section id or exact title from list_app_doc_sections." },
	},
	required: ["section_id"],
	},
	},
	});
	list.push({
	type: "function",
	function: {
	name: "list_memories",
	description: "List the currently saved persistent memories for this user.",
	parameters: {
	type: "object",
	properties: {},
	},
	},
	});
	list.push({
	type: "function",
	function: {
	name: "save_memory",
	description: "Save a short persistent memory that may help in future chats. Keep it brief and only use this for durable user preferences or facts.",
	parameters: {
	type: "object",
	properties: {
	content: { type: "string", description: "A short durable memory, ideally one sentence or shorter." },
	},
	required: ["content"],
	},
	},
	});
	list.push({
	type: "function",
	function: {
	name: "delete_memory",
	description: "Delete a previously saved persistent memory when it is outdated or incorrect.",
	parameters: {
	type: "object",
	properties: {
	memory_id: { type: "string", description: "The memory id to delete." },
	},
	required: ["memory_id"],
	},
	},
	});
	list.push({
	type: "function",
	function: {
	name: "edit_response_draft",
	description: "Revise text that was already streamed to the user. Use this to correct or remove visible text that has already appeared.",
	parameters: {
	type: "object",
	properties: {
	operation: {
	type: "string",
	enum: ["replace_all", "replace_last_match", "delete_last_match", "delete_last_chars", "append", "clear"],
	},
	text: { type: "string", description: "Replacement text for replace_all or text to append." },
	target_text: { type: "string", description: "Exact text to replace or delete for match-based operations." },
	replacement_text: { type: "string", description: "Replacement text for replace_last_match." },
	count: { type: "number", description: "Character count for delete_last_chars." },
	reason: { type: "string", description: "Brief reason for the visible edit." },
	},
	required: ["operation"],
	},
	},
	});
	if (config.webSearch) {
	list.push({
	type: "function",
	function: {
	name: "ollama_search",
	description: "Search the web for current information",
	parameters: {
	type: "object",
	properties: { query: { type: "string", description: "Search query" } },
	required: ["query"],
	},
	},
	});
	list.push({
	type: "function",
	function: {
	name: "read_web_page",
	description: "Read the content of a web page by URL",
	parameters: {
	type: "object",
	properties: { url: { type: "string", description: "URL to fetch" } },
	required: ["url"],
	},
	},
	});
	}
	if (config.imageGen) {
	list.push({
	type: "function",
	function: {
	name: "generate_image",
	description: "Generate an image from a prompt",
	parameters: {
	type: "object",
	properties: {
	prompt: { type: "string" },
	mode: { type: "string", enum: ["auto", "fantasy", "realistic"] },
	image_urls: { type: "array", items: { type: "string" } },
	},
	required: ["prompt"],
	},
	},
	});
	}
	if (config.videoGen) {
	list.push({
	type: "function",
	function: {
	name: "generate_video",
	description: "Generate a video from a prompt",
	parameters: {
	type: "object",
	properties: {
	prompt: { type: "string" },
	ratio: { type: "string", enum: ["3:2", "2:3", "1:1"] },
	mode: { type: "string", enum: ["normal", "fun"] },
	duration: { type: "number" },
	image_urls: { type: "array", items: { type: "string" } },
	},
	required: ["prompt"],
	},
	},
	});
	}
	if (config.audioGen) {
	list.push({
	type: "function",
	function: {
	name: "generate_audio",
	description: "Generate music or sound effects from a prompt",
	parameters: {
	type: "object",
	properties: { prompt: { type: "string" } },
	required: ["prompt"],
	},
	},
	});
	}
	return list;
	}

	function normalizeRequestedImageIndexes(args, resource) {
	const explicitIndexes = Array.isArray(args.indexes)
	? [...new Set(args.indexes.map((value) => Number(value)).filter(Number.isInteger))]
	: [];

	const validExplicitIndexes = explicitIndexes.filter(
	(index) => index >= 0 && index < resource.imageCount
	);
	if (validExplicitIndexes.length) return validExplicitIndexes;

	const requestedStart = Number.isInteger(Number(args.start_index))
	? Number(args.start_index)
	: resource.inlineImageCount \|\| 0;
	const requestedCount = Number.isInteger(Number(args.count))
	? Number(args.count)
	: MAX_INLINE_IMAGES;

	const indexes = [];
	for (let i = 0; i < requestedCount; i++) {
	const index = requestedStart + i;
	if (index >= 0 && index < resource.imageCount) {
	indexes.push(index);
	}
	}

	return indexes;
	}

	function applyResponseDraftEdit(currentText, args = {}) {
	const source = String(currentText \|\| "");
	const operation = args.operation;
	switch (operation) {
	case "replace_all":
	return String(args.text \|\| "");
	case "append":
	return source + String(args.text \|\| "");
	case "clear":
	return "";
	case "replace_last_match": {
	const target = String(args.target_text \|\| "");
	if (!target) return source;
	const idx = source.lastIndexOf(target);
	if (idx === -1) return source;
	return source.slice(0, idx) + String(args.replacement_text \|\| "") + source.slice(idx + target.length);
	}
	case "delete_last_match": {
	const target = String(args.target_text \|\| "");
	if (!target) return source;
	const idx = source.lastIndexOf(target);
	if (idx === -1) return source;
	return source.slice(0, idx) + source.slice(idx + target.length);
	}
	case "delete_last_chars": {
	const count = Math.max(0, Number(args.count) \|\| 0);
	return source.slice(0, Math.max(0, source.length - count));
	}
	default:
	return source;
	}
	}

	async function processToolCalls({
	sessionId,
	toolCalls,
	tools,
	owner,
	accessToken,
	clientId,
	webSearchLimit,
	abortSignal,
	draftState,
	onToolCall,
	onNewAsset,
	onDraftEdit,
	}) {
	const nextMessages = [];
	const authHeaders = {};
	const allowedToolNames = getAllowedToolNames(buildToolList(tools));
	if (accessToken) {
	authHeaders["Authorization"] = `Bearer ${accessToken}`;
	}
	if (clientId) {
	authHeaders["X-Client-ID"] = clientId;
	}

	for (const call of toolCalls) {
	let args;
	try { args = JSON.parse(call.function.arguments \|\| "{}"); } catch { args = {}; }
	const toolName = sanitizeToolName(call.function?.name, allowedToolNames);
	if (toolName) {
	call.function.name = toolName;
	}

	onToolCall({ id: call.id, name: toolName \|\| call.function?.name, state: "pending", args });

	let result = "Tool completed.";

	try {
	if (!toolName \|\| !allowedToolNames.includes(toolName)) {
	result = `Invalid tool name "${call.function?.name \|\| "unknown"}".`;
	}

	else if (toolName === "list_prompt_resources") {
	const state = getPromptState(sessionId);
	result = JSON.stringify(buildPromptResourceManifest(state), null, 2);
	}

	else if (toolName === "read_prompt_chunk") {
	const state = getPromptState(sessionId);
	const chunkIndex = Number.isInteger(Number(args.chunk_index))
	? Number(args.chunk_index)
	: Number(args.chunk_id);
	const resourceId = args.resource_id \|\| args.filename;
	const resource = findPromptResource(state, resourceId);

	if (!resource) {
	result = `Prompt resource "${resourceId}" not found.`;
	} else if (resource.kind === "images") {
	result = `Resource "${resource.id}" contains images, not text. Use load_prompt_images instead.`;
	} else if (!Number.isInteger(chunkIndex) \|\| chunkIndex < 0 \|\| chunkIndex >= resource.chunkCount) {
	result = `Chunk ${chunkIndex} is out of range for resource "${resource.id}" (${resource.chunkCount} chunk(s)).`;
	} else {
	const chunk = resource.chunks[chunkIndex];
	result = [
	`Resource: ${resource.id}`,
	`Name: ${resource.name}`,
	`Kind: ${resource.kind}`,
	`Chunk: ${chunk.index + 1}/${resource.chunkCount}`,
	"",
	chunk.text,
	].join("\n");
	}
	}

	else if (toolName === "load_prompt_images") {
	const state = getPromptState(sessionId);
	const resource = findPromptResource(state, args.resource_id);

	if (!resource) {
	result = `Prompt image resource "${args.resource_id}" not found.`;
	} else if (resource.kind !== "images") {
	result = `Resource "${resource.id}" is not an image resource.`;
	} else {
	const indexes = normalizeRequestedImageIndexes(args, resource);
	const selectedImages = indexes
	.map((index) => resource.images[index]?.block)
	.filter(Boolean);

	if (!selectedImages.length) {
	result = `No images were loaded from "${resource.id}".`;
	} else {
	result = `Loaded image indexes ${indexes.join(", ")} from "${resource.id}".`;
	nextMessages.push({
	role: "tool",
	tool_call_id: call.id,
	content: result,
	});
	nextMessages.push({
	role: "user",
	content: [
	{
	type: "text",
	text: `Requested staged images from resource "${resource.id}" (${resource.name}), indexes ${indexes.join(", ")}.`,
	},
	...selectedImages,
	],
	});
	onToolCall({ id: call.id, name: toolName, state: "resolved", result });
	continue;
	}
	}
	}

	else if (toolName === "write_notes") {
	appendAssistantNote(sessionId, args.note);
	result = "Note stored for the rest of this response.";
	}

	else if (toolName === "list_app_doc_sections") {
	const sections = await listAppDocSections();
	result = JSON.stringify(sections, null, 2);
	}

	else if (toolName === "read_app_doc_section") {
	const section = await readAppDocSection(args.section_id);
	result = section
	? [`Section: ${section.title}`, `Section ID: ${section.id}`, "", section.content].join("\n")
	: `Documentation section "${args.section_id}" was not found.`;
	}

	else if (toolName === "list_memories") {
	const memories = owner ? await memoryStore.list(owner) : [];
	result = JSON.stringify(memories, null, 2);
	}

	else if (toolName === "save_memory") {
	const memory = owner
	? await memoryStore.create(owner, {
	content: args.content,
	sessionId,
	source: "assistant",
	})
	: null;
	result = memory
	? `Saved memory ${memory.id}: ${memory.content}`
	: "Memory was empty or could not be saved.";
	}

	else if (toolName === "delete_memory") {
	const ok = owner ? await memoryStore.delete(owner, args.memory_id) : false;
	result = ok ? `Deleted memory ${args.memory_id}.` : `Memory ${args.memory_id} was not found.`;
	}

	else if (toolName === "edit_response_draft") {
	const before = draftState?.text \|\| "";
	const after = applyResponseDraftEdit(before, args);
	if (draftState) draftState.text = after;
	const edit = {
	operation: args.operation,
	reason: String(args.reason \|\| "").trim() \|\| "Model revised its draft.",
	before,
	after,
	timestamp: Date.now(),
	};
	if (before !== after) {
	onDraftEdit(edit, after);
	result = [
	`Draft updated. The visible response now has ${after.length} character(s).`,
	"Visible draft:",
	(after \|\| "[empty]").slice(-4000),
	].join("\n\n");
	} else {
	result = "Draft edit made no visible change.";
	}
	}

	else if (toolName === "ollama_search") {
	if (webSearchLimit?.key && Number.isFinite(webSearchLimit.limit)) {
	const usage = await consumeWebSearchUsage(webSearchLimit.key, webSearchLimit.limit);
	if (!usage.allowed) {
	result = `Web search limit reached for today. Free accounts can use ${usage.limit} web searches per day. Try again on ${usage.window} after the daily reset or upgrade to a paid plan.`;
	} else {
	result = await gradioSearch(args.query);
	}
	} else {
	result = await gradioSearch(args.query);
	}
	}

	else if (toolName === "read_web_page") {
	const { convert } = await import("html-to-text");
	const res = await fetch(args.url, { signal: abortSignal });
	if (!res.ok) {
	result = `Failed to fetch: ${res.status}`;
	} else {
	const html = await res.text();
	const titleMatch = html.match(/<title>(.*?)<\/title>/i);
	result = JSON.stringify({
	title: titleMatch?.[1] \|\| "No title",
	content: convert(html, { wordwrap: false }).slice(0, 8000),
	});
	}
	}

	else if (toolName === "generate_image") {
	const body = { prompt: args.prompt };
	if (args.mode) body.mode = args.mode;
	if (args.image_urls?.length) body.image_urls = args.image_urls;

	const res = await fetch(`${LIGHTNING_BASE}/gen/image`, {
	method: "POST",
	headers: { "Content-Type": "application/json", ...authHeaders },
	body: JSON.stringify(body),
	signal: abortSignal,
	});
	if (res.ok) {
	const buf = await res.arrayBuffer();
	const ct = res.headers.get("content-type") \|\| "image/png";
	const stored = await mediaStore.storeBuffer(owner, {
	name: `generated-image-${Date.now()}.${ct.includes("svg") ? "svg" : ct.split("/")[1] \|\| "png"}`,
	mimeType: ct,
	buffer: Buffer.from(buf),
	sessionId,
	source: "assistant_generated",
	kind: "image",
	});
	onNewAsset({ id: stored.id, role: "image", mimeType: ct, name: stored.name });
	result = "Image generated successfully and shown to the user.";
	} else if (res.status == 402) {
	result = "An upgraded plan is required for higher limits.";
	} else if (res.status == 429) {
	result = "Too many requests. Try again later.";
	} else {
	result = `Image generation failed: ${res.status}`;
	}
	}

	else if (toolName === "generate_video") {
	const body = { prompt: args.prompt };
	if (args.ratio) body.ratio = args.ratio;
	if (args.mode) body.mode = args.mode;
	if (args.duration) body.duration = args.duration;
	if (args.image_urls?.length) body.image_urls = args.image_urls;

	const res = await fetch(`${LIGHTNING_BASE}/gen/video`, {
	method: "POST",
	headers: { "Content-Type": "application/json", ...authHeaders },
	body: JSON.stringify(body),
	signal: abortSignal,
	});
	if (res.ok) {
	const buf = await res.arrayBuffer();
	const contentType = res.headers.get("content-type") \|\| "video/mp4";
	const stored = await mediaStore.storeBuffer(owner, {
	name: `generated-video-${Date.now()}.${contentType.split("/")[1] \|\| "mp4"}`,
	mimeType: contentType,
	buffer: Buffer.from(buf),
	sessionId,
	source: "assistant_generated",
	kind: "video",
	});
	onNewAsset({ id: stored.id, role: "video", mimeType: contentType, name: stored.name });
	result = "Video generated successfully and shown to the user.";
	} else if (res.status == 402) {
	result = "An upgraded plan is required for higher limits.";
	} else if (res.status == 429) {
	result = "Too many requests. Try again later.";
	} else {
	result = `Video generation failed: ${res.status}`;
	}
	}

	else if (toolName === "generate_audio") {
	const res = await fetch(`${LIGHTNING_BASE}/gen/sfx`, {
	method: "POST",
	headers: { "Content-Type": "application/json", ...authHeaders },
	body: JSON.stringify({ prompt: args.prompt }),
	signal: abortSignal,
	});
	if (res.ok) {
	const buf = await res.arrayBuffer();
	const contentType = res.headers.get("content-type") \|\| "audio/mpeg";
	const stored = await mediaStore.storeBuffer(owner, {
	name: `generated-audio-${Date.now()}.${contentType.split("/")[1] \|\| "mp3"}`,
	mimeType: contentType,
	buffer: Buffer.from(buf),
	sessionId,
	source: "assistant_generated",
	kind: "audio",
	});
	onNewAsset({ id: stored.id, role: "audio", mimeType: contentType, name: stored.name });
	result = "Audio generated successfully and shown to the user.";
	} else if (res.status == 429) {
	result = "Too many requests. Try again later.";
	} else {
	result = `Audio generation failed: ${res.status}. This is most likely an upstream provider error.`;
	}
	}
	} catch (err) {
	result = `Tool error: ${String(err)}`;
	}

	onToolCall({ id: call.id, name: toolName \|\| call.function?.name, state: "resolved", result });

	nextMessages.push({
	role: "tool",
	tool_call_id: call.id,
	content: typeof result === "string" ? result : JSON.stringify(result),
	});
	}

	return { nextMessages, draftText: draftState?.text \|\| "" };
	}