WindsurfAPI / src /client.js
github-actions[bot]
Deploy from GitHub: 7495fde758f0be655f95e6331fec2898267f790c
f6266b9
/**
* WindsurfClient β€” talks to the local language server binary via gRPC (HTTP/2).
*
* Two flows:
* Legacy β†’ RawGetChatMessage (streaming, for enum-only models)
* Cascade β†’ StartCascade β†’ SendUserCascadeMessage β†’ poll (for modelUid models)
*/
import https from 'https';
import { randomUUID } from 'crypto';
import { log } from './config.js';
import { grpcFrame, grpcUnary, grpcStream } from './grpc.js';
import { getLsEntryByPort } from './langserver.js';
import {
buildRawGetChatMessageRequest, parseRawResponse,
buildInitializePanelStateRequest,
buildAddTrackedWorkspaceRequest,
buildUpdateWorkspaceTrustRequest,
buildStartCascadeRequest, parseStartCascadeResponse,
buildSendCascadeMessageRequest,
buildGetTrajectoryRequest, parseTrajectoryStatus,
buildGetTrajectoryStepsRequest, parseTrajectorySteps,
buildGetGeneratorMetadataRequest, parseGeneratorMetadata,
buildGetUserStatusRequest, parseGetUserStatusResponse,
} from './windsurf.js';
const LS_SERVICE = '/exa.language_server_pb.LanguageServerService';
function contentToString(content) {
if (typeof content === 'string') return content;
if (Array.isArray(content)) {
return content.map(p => (typeof p?.text === 'string' ? p.text : JSON.stringify(p))).join('');
}
return content == null ? '' : JSON.stringify(content);
}
// ─── WindsurfClient ────────────────────────────────────────
export class WindsurfClient {
/**
* @param {string} apiKey - Codeium API key
* @param {number} port - Language server gRPC port
* @param {string} csrfToken - CSRF token for auth
*/
constructor(apiKey, port, csrfToken) {
this.apiKey = apiKey;
this.port = port;
this.csrfToken = csrfToken;
}
// ─── Legacy: RawGetChatMessage (streaming) ───────────────
/**
* Stream chat via RawGetChatMessage.
* Used for models without a string UID (enum < 280 generally).
*
* @param {Array} messages - OpenAI-format messages
* @param {number} modelEnum - Model enum value
* @param {string} [modelName] - Optional model name
* @param {object} opts - { onChunk, onEnd, onError }
*/
rawGetChatMessage(messages, modelEnum, modelName, opts = {}) {
const { onChunk, onEnd, onError } = opts;
const proto = buildRawGetChatMessageRequest(this.apiKey, messages, modelEnum, modelName);
const body = grpcFrame(proto);
log.debug(`RawGetChatMessage: enum=${modelEnum} msgs=${messages.length}`);
return new Promise((resolve, reject) => {
const chunks = [];
grpcStream(this.port, this.csrfToken, `${LS_SERVICE}/RawGetChatMessage`, body, {
onData: (payload) => {
try {
const parsed = parseRawResponse(payload);
if (parsed.text) {
// Detect server-side errors returned as text
const errMatch = /^(permission_denied|failed_precondition|not_found|unauthenticated):/.test(parsed.text.trim());
if (parsed.isError || errMatch) {
const err = new Error(parsed.text.trim());
// Mark model-level errors so they don't count against the account
err.isModelError = /permission_denied|failed_precondition/.test(parsed.text);
reject(err);
return;
}
chunks.push(parsed);
onChunk?.(parsed);
}
} catch (e) {
log.error('RawGetChatMessage parse error:', e.message);
}
},
onEnd: () => {
onEnd?.(chunks);
resolve(chunks);
},
onError: (err) => {
onError?.(err);
reject(err);
},
});
});
}
/**
* Run (or wait for) the one-shot Cascade workspace init for this LS.
* Idempotent β€” the LS entry caches the in-flight Promise so concurrent
* callers share one init round. Safe to call from a startup warmup path
* so the first real chat request skips these 3 gRPC round-trips.
*/
warmupCascade(force = false) {
const lsEntry = getLsEntryByPort(this.port);
if (!lsEntry) return Promise.resolve();
if (force) {
lsEntry.workspaceInit = null;
lsEntry.sessionId = randomUUID();
}
if (!lsEntry.sessionId) lsEntry.sessionId = randomUUID();
if (lsEntry.workspaceInit) return lsEntry.workspaceInit;
const sessionId = lsEntry.sessionId;
const workspacePath = '/tmp/windsurf-workspace';
const workspaceUri = 'file:///tmp/windsurf-workspace';
lsEntry.workspaceInit = (async () => {
try {
const initProto = buildInitializePanelStateRequest(this.apiKey, sessionId);
await grpcUnary(this.port, this.csrfToken,
`${LS_SERVICE}/InitializeCascadePanelState`, grpcFrame(initProto), 5000);
} catch (e) { log.warn(`InitializeCascadePanelState: ${e.message}`); }
try {
const addWsProto = buildAddTrackedWorkspaceRequest(this.apiKey, workspacePath, sessionId);
await grpcUnary(this.port, this.csrfToken,
`${LS_SERVICE}/AddTrackedWorkspace`, grpcFrame(addWsProto), 5000);
} catch (e) { log.warn(`AddTrackedWorkspace: ${e.message}`); }
try {
const trustProto = buildUpdateWorkspaceTrustRequest(this.apiKey, workspaceUri, true, sessionId);
await grpcUnary(this.port, this.csrfToken,
`${LS_SERVICE}/UpdateWorkspaceTrust`, grpcFrame(trustProto), 5000);
} catch (e) { log.warn(`UpdateWorkspaceTrust: ${e.message}`); }
log.info(`Cascade workspace init complete for LS port=${this.port}`);
})().catch(e => {
lsEntry.workspaceInit = null;
throw e;
});
return lsEntry.workspaceInit;
}
// ─── Cascade flow ────────────────────────────────────────
/**
* Chat via Cascade flow (for premium models with string UIDs).
*
* 1. StartCascade β†’ cascade_id
* 2. SendUserCascadeMessage (with model config)
* 3. Poll GetCascadeTrajectorySteps until IDLE
*
* @param {Array} messages
* @param {number} modelEnum
* @param {string} modelUid
* @param {object} opts - { onChunk, onEnd, onError }
*/
async cascadeChat(messages, modelEnum, modelUid, opts = {}) {
const { onChunk, onEnd, onError, signal, reuseEntry, toolPreamble } = opts;
const aborted = () => signal?.aborted;
const inputChars = messages.reduce((n, m) => n + contentToString(m?.content).length, 0);
log.debug(`CascadeChat: uid=${modelUid} enum=${modelEnum} msgs=${messages.length} reuse=${!!reuseEntry}`);
// One-shot per-LS workspace init (idempotent; typically pre-warmed at
// LS startup). Falls back to a local session id if the LS entry is gone.
const lsEntry = getLsEntryByPort(this.port);
await this.warmupCascade().catch(() => {});
let sessionId = reuseEntry?.sessionId || lsEntry?.sessionId || randomUUID();
// "panel state not found" means the LS forgot the panel for our sessionId
// (LS restarted, TTL expired, etc.). Re-run warmupCascade with a fresh
// sessionId and retry the handshake once.
const isPanelMissing = (e) => /panel state not found|not_found.*panel/i.test(e?.message || '');
try {
// Step 1: Start cascade β€” with retry on panel-state-not-found
let cascadeId;
const openCascade = async () => {
if (reuseEntry?.cascadeId) {
log.debug(`Cascade resumed: ${reuseEntry.cascadeId}`);
return reuseEntry.cascadeId;
}
const startProto = buildStartCascadeRequest(this.apiKey, sessionId);
const startResp = await grpcUnary(
this.port, this.csrfToken, `${LS_SERVICE}/StartCascade`, grpcFrame(startProto)
);
const id = parseStartCascadeResponse(startResp);
if (!id) throw new Error('StartCascade returned empty cascade_id');
log.debug(`Cascade started: ${id}`);
return id;
};
try {
cascadeId = await openCascade();
} catch (e) {
if (!isPanelMissing(e)) throw e;
log.warn(`Panel state missing, re-warming LS port=${this.port}`);
await this.warmupCascade(true).catch(() => {});
sessionId = getLsEntryByPort(this.port)?.sessionId || randomUUID();
if (reuseEntry) reuseEntry.cascadeId = null; // force StartCascade
cascadeId = await openCascade();
}
// Build the text payload. Two cases:
// - Resuming an existing cascade: the backend already has the prior
// turns cached, so we only send the newest user message.
// - Fresh cascade: we have to pack the entire history into one shot
// (Cascade doesn't accept a messages array). System blocks go on
// top, then we render u/a turns as a labeled transcript so the
// model can see its own prior replies β€” previously we dropped
// assistant turns entirely and multi-turn context was broken.
//
// The caller (handlers/chat.js) is responsible for any tool-protocol
// preamble that needs to sit in front of the user text (client-defined
// OpenAI tools are serialized into a '<tool_call>{...}</tool_call>'
// emission contract there). This function just stitches system + u/a
// turns into the single text payload Cascade accepts.
let text;
if (reuseEntry?.cascadeId) {
const lastUser = [...messages].reverse().find(m => m.role === 'user');
text = lastUser ? contentToString(lastUser.content) : '';
} else {
const systemMsgs = messages.filter(m => m.role === 'system');
const convo = messages.filter(m => m.role === 'user' || m.role === 'assistant');
const sysText = systemMsgs.map(m => contentToString(m.content)).join('\n').trim();
if (convo.length <= 1) {
const last = convo[convo.length - 1];
text = last ? contentToString(last.content) : '';
} else {
const lines = [];
for (let i = 0; i < convo.length - 1; i++) {
const m = convo[i];
const label = m.role === 'user' ? 'User' : 'Assistant';
lines.push(`${label}: ${contentToString(m.content)}`);
}
const latest = convo[convo.length - 1];
const latestText = latest ? contentToString(latest.content) : '';
text = `[Conversation so far]\n${lines.join('\n\n')}\n\n[Current user message]\n${latestText}`;
}
if (sysText) text = sysText + '\n\n' + text;
}
// Step 2: Send message (retry once on panel-state-not-found)
const sendMessage = async () => {
const sendProto = buildSendCascadeMessageRequest(this.apiKey, cascadeId, text, modelEnum, modelUid, sessionId, { toolPreamble });
await grpcUnary(
this.port, this.csrfToken, `${LS_SERVICE}/SendUserCascadeMessage`, grpcFrame(sendProto)
);
};
try {
await sendMessage();
} catch (e) {
if (!isPanelMissing(e)) throw e;
log.warn(`Panel state missing on Send, re-warming + restarting cascade port=${this.port}`);
await this.warmupCascade(true).catch(() => {});
sessionId = getLsEntryByPort(this.port)?.sessionId || randomUUID();
const startProto = buildStartCascadeRequest(this.apiKey, sessionId);
const startResp = await grpcUnary(
this.port, this.csrfToken, `${LS_SERVICE}/StartCascade`, grpcFrame(startProto)
);
cascadeId = parseStartCascadeResponse(startResp);
if (!cascadeId) throw new Error('StartCascade returned empty cascade_id after re-warm');
await sendMessage();
}
// Step 3: Poll for response.
// Track per-step text cursors instead of a single global `lastYielded`.
// The cascade trajectory can contain MULTIPLE PLANNER_RESPONSE steps
// (thinking step + final response, or multi-turn). The old single-cursor
// code silently dropped any step whose text was shorter than the longest
// step seen so far β€” which showed up as "30k in / 200 out" where the real
// answer was split across two steps and only one was emitted.
const chunks = [];
const yieldedByStep = new Map(); // stepIndex β†’ emitted text length
const thinkingByStep = new Map(); // stepIndex β†’ emitted thinking length
// Server-reported token usage, one entry per step keyed by step index.
// Each value is the latest {inputTokens, outputTokens, cacheReadTokens,
// cacheWriteTokens} observed on that step's CortexStepMetadata.model_usage.
// Summed across all steps at return time β†’ the response's real usage.
const usageByStep = new Map();
const seenToolCallIds = new Set();
const toolCalls = [];
let totalYielded = 0;
let totalThinking = 0;
let idleCount = 0;
let pollCount = 0;
let sawActive = false; // true once we've seen a non-IDLE status
let sawText = false; // true once at least one PLANNER_RESPONSE with text arrived
let lastStatus = -1;
// "Progress" is ANY forward motion on the trajectory β€” text, thinking,
// new tool call, or a new step appearing. Using this (instead of text
// alone) for stall detection fixes the false-positive warm stalls where
// Cascade is legitimately mid-thinking but `responseText` hasn't moved.
let lastGrowthAt = Date.now();
let lastStepCount = 0;
const maxWait = 180_000;
const pollInterval = 250;
const IDLE_GRACE_MS = 8_000; // minimum time before idle-break allowed
// 25s no progress on any signal = genuine stall. Was 15s + text-only,
// which misfired on long thinking phases and returned tiny "Let me…"
// preambles as if they were complete replies.
const NO_GROWTH_STALL_MS = 25_000;
const STALL_RETRY_MIN_TEXT = 300; // stalls shorter than this β†’ retryable error, not partial success
const startTime = Date.now();
let endReason = 'unknown';
while (Date.now() - startTime < maxWait) {
if (aborted()) { endReason = 'aborted'; break; }
await new Promise(r => setTimeout(r, pollInterval));
if (aborted()) { endReason = 'aborted'; break; }
pollCount++;
// Get steps
const stepsProto = buildGetTrajectoryStepsRequest(cascadeId, 0);
const stepsResp = await grpcUnary(
this.port, this.csrfToken, `${LS_SERVICE}/GetCascadeTrajectorySteps`, grpcFrame(stepsProto)
);
const steps = parseTrajectorySteps(stepsResp);
// CORTEX_STEP_TYPE_ERROR_MESSAGE = 17. An error step means the cascade
// refused the request (permission denied, model unavailable, etc.) β€”
// raise it as a model-level error so the account isn't blamed.
for (const step of steps) {
if (step.type === 17 && step.errorText) {
// Log the full trajectory context so we can see WHICH tool call
// (if any) the error refers to. "invalid tool call" without
// context is useless for debugging.
const trail = steps.map(s => ({
type: s.type,
status: s.status,
textLen: s.text?.length || 0,
tools: (s.toolCalls || []).map(tc => tc.name).join(','),
}));
log.warn('Cascade error step', { errorText: step.errorText.trim(), trail });
const err = new Error(step.errorText.trim());
err.isModelError = true;
throw err;
}
}
// Stall detection β€” two flavors:
// (a) "cold stall": 30s+ ACTIVE but never saw any text or tool
// call β†’ planner is deadlocked before even starting to
// produce output. Rotate account, don't make the user wait.
// (b) "warm stall": we already streamed some text, but it hasn't
// grown for 15s while status is still non-IDLE β†’ planner is
// stuck in a tool round-trip or upstream throttle. Accept
// what we have as a complete response rather than waiting
// out the full 180s maxWait with the client hanging.
const elapsed = Date.now() - startTime;
// Cap at maxWait (180s): long-context requests can legitimately take
// that long to emit the first token from Cascade. Was 90s which
// still tripped on very long prompts (issue #5).
const coldStallMs = Math.min(maxWait, 30_000 + Math.floor(inputChars / 1500) * 5_000);
if (elapsed > coldStallMs && sawActive && !sawText && seenToolCallIds.size === 0) {
log.warn(`Cascade cold stall: ${elapsed}ms active without any text or tool call (threshold=${coldStallMs}ms, inputChars=${inputChars}), bailing`);
endReason = 'stall_cold';
const err = new Error(`Cascade planner stalled β€” no output after ${Math.round(coldStallMs / 1000)}s`);
err.isModelError = true;
throw err;
}
if (sawText && lastStatus !== 1 && (Date.now() - lastGrowthAt) > NO_GROWTH_STALL_MS) {
const diag = {
msSinceGrowth: Date.now() - lastGrowthAt,
textLen: totalYielded,
thinkingLen: totalThinking,
stepCount: yieldedByStep.size,
toolCalls: seenToolCallIds.size,
lastStatus,
};
// Short-reply stall β†’ treat as error so handlers/chat.js retries on
// another account. A 50-char preamble is worse than no reply at all
// because the client accepts it as "successful" and shows it to the
// user. Retry only if we haven't streamed anything substantial yet
// (if we did, partial delivery + idle end is fine).
if (totalYielded < STALL_RETRY_MIN_TEXT) {
log.warn('Cascade warm stall (short, retrying on next account)', diag);
endReason = 'stall_warm_retry';
const err = new Error('Cascade planner stalled after preamble β€” no progress for 25s');
err.isModelError = true;
throw err;
}
log.warn('Cascade warm stall (accepting partial)', diag);
endReason = 'stall_warm';
break; // return what we have as a successful response
}
// Any trajectory change counts as forward progress. A new step, a new
// tool call proposal, or thinking growth all reset the stall timer so
// Cascade's slow silent planning phases don't get cut off mid-think.
if (steps.length > lastStepCount) {
lastStepCount = steps.length;
lastGrowthAt = Date.now();
}
for (let i = 0; i < steps.length; i++) {
const step = steps[i];
// Per-step token usage. Overwrite on every poll so the map always
// holds the latest reported numbers (they grow monotonically as
// the generator emits more output). We sum across steps at the
// end to compute the response's total usage.
if (step.usage) usageByStep.set(i, step.usage);
// Collect tool calls β€” dedupe by id so the same step seen across
// polls only emits once. A tool call with an existing `result`
// means the LS already executed it (built-in Cascade tool); we
// pass it through to the client for visibility.
if (step.toolCalls && step.toolCalls.length) {
for (const tc of step.toolCalls) {
const key = tc.id || `${tc.name}:${tc.argumentsJson}`;
if (seenToolCallIds.has(key)) continue;
seenToolCallIds.add(key);
toolCalls.push(tc);
lastGrowthAt = Date.now();
}
}
// Thinking delta: the LS keeps `thinking` as the cumulative
// reasoning text for the step. Track a per-step cursor and emit
// only the tail as reasoning_content. Crucially, thinking growth
// *also* resets lastGrowthAt β€” prior code only watched response
// text, so long silent thinking phases got falsely flagged as
// stalls and 20% of Cascade requests came back as 50-char
// preambles (`/tmp/...` style "let me analyze" stubs).
const liveThink = step.thinking || '';
if (liveThink) {
const prevThink = thinkingByStep.get(i) || 0;
if (liveThink.length > prevThink) {
const thinkDelta = liveThink.slice(prevThink);
thinkingByStep.set(i, liveThink.length);
totalThinking += thinkDelta.length;
lastGrowthAt = Date.now();
const tchunk = { text: '', thinking: thinkDelta, isError: false };
chunks.push(tchunk);
onChunk?.(tchunk);
}
}
// Text delta rule: prefer `responseText` (append-only stream) over
// `modifiedText` (LS post-pass rewrite) while we're streaming. The
// LS periodically swaps `response` β†’ `modified_response` mid-turn
// with slightly different wording; if we blindly `entry.text =
// modifiedText || responseText` and take a length-based slice, the
// rewritten middle bytes vanish because we already advanced the
// cursor past them in an earlier poll. Using responseText keeps the
// slice monotonic. At turn end we top up with `modifiedText` (see
// below) so the final accumulated text is still the LS's polished
// version when one exists.
const liveText = step.responseText || step.text || '';
if (!liveText) continue;
const prev = yieldedByStep.get(i) || 0;
if (liveText.length > prev) {
const delta = liveText.slice(prev);
yieldedByStep.set(i, liveText.length);
totalYielded += delta.length;
lastGrowthAt = Date.now();
sawText = true;
const chunk = { text: delta, thinking: '', isError: false };
chunks.push(chunk);
onChunk?.(chunk);
}
}
// Check status
const statusProto = buildGetTrajectoryRequest(cascadeId);
const statusResp = await grpcUnary(
this.port, this.csrfToken, `${LS_SERVICE}/GetCascadeTrajectory`, grpcFrame(statusProto)
);
const status = parseTrajectoryStatus(statusResp);
lastStatus = status;
if (status !== 1) sawActive = true;
if (status === 1) { // IDLE
// Don't allow idle-break during the warmup window unless we've
// already seen the planner go non-IDLE at least once. Without this
// guard, cascades whose trajectory hasn't kicked off yet (status
// stuck at 1 for the first ~600ms) terminate after only 2 polls
// and the client sees a near-empty reply.
const elapsed = Date.now() - startTime;
const graceOver = elapsed > IDLE_GRACE_MS;
if (!sawActive && !graceOver) {
continue; // still warming up β€” don't count this as idle
}
idleCount++;
// Require at least a little text OR a long idle streak before
// accepting "done", so we don't race the first visible chunk.
const canBreak = sawText ? idleCount >= 2 : idleCount >= 4;
if (canBreak) {
// Final sweep
const finalResp = await grpcUnary(
this.port, this.csrfToken, `${LS_SERVICE}/GetCascadeTrajectorySteps`, grpcFrame(stepsProto)
);
const finalSteps = parseTrajectorySteps(finalResp);
for (let i = 0; i < finalSteps.length; i++) {
const step = finalSteps[i];
const responseText = step.responseText || '';
const modifiedText = step.modifiedText || '';
const prev = yieldedByStep.get(i) || 0;
// Normal top-up: responseText grew past what we streamed.
if (responseText.length > prev) {
const delta = responseText.slice(prev);
yieldedByStep.set(i, responseText.length);
totalYielded += delta.length;
chunks.push({ text: delta, thinking: '', isError: false });
onChunk?.({ text: delta, thinking: '', isError: false });
}
// Modified-response top-up: only if it's a strict extension of
// what we already emitted. If modifiedText rewrites the prefix
// (common when LS polishes), emitting the tail would splice
// wrong content onto the stream, so we skip it and keep the
// raw responseText we already showed.
const cursor = yieldedByStep.get(i) || 0;
if (modifiedText.length > cursor && modifiedText.startsWith(responseText)) {
const delta = modifiedText.slice(cursor);
yieldedByStep.set(i, modifiedText.length);
totalYielded += delta.length;
chunks.push({ text: delta, thinking: '', isError: false });
onChunk?.({ text: delta, thinking: '', isError: false });
}
}
endReason = sawText ? 'idle_done' : 'idle_empty';
break;
}
} else {
idleCount = 0;
}
}
if (endReason === 'unknown') endReason = 'max_wait';
// Structured summary so we can diagnose short/empty completions after
// the fact. sawActive=false + sawText=false + idle_empty = the planner
// never actually ran on this cascade β€” likely an upstream starvation.
const summary = {
cascadeId: cascadeId.slice(0, 8),
reason: endReason,
polls: pollCount,
textLen: totalYielded,
thinkingLen: totalThinking,
stepCount: Math.max(yieldedByStep.size, thinkingByStep.size, lastStepCount),
toolCalls: seenToolCallIds.size,
sawActive,
sawText,
lastStatus,
ms: Date.now() - startTime,
};
if (totalYielded < 20 && endReason !== 'aborted') {
log.warn('Cascade short reply', summary);
} else {
log.info('Cascade done', summary);
}
onEnd?.(chunks);
// ── Real token usage via GetCascadeTrajectoryGeneratorMetadata ──
// CortexStepMetadata.model_usage (the per-step field) is usually empty
// in the step trajectory response β€” the LS only populates the real
// token counts in a separate RPC keyed off cascade_id. We fire this
// once after the polling loop ends. Keep it non-fatal: a network blip
// here just drops usage back to the chars/4 estimator, the response
// itself is already formed.
let serverUsage = null;
try {
const metaReq = buildGetGeneratorMetadataRequest(cascadeId, 0);
const metaResp = await grpcUnary(
this.port, this.csrfToken,
`${LS_SERVICE}/GetCascadeTrajectoryGeneratorMetadata`,
grpcFrame(metaReq), 5000
);
serverUsage = parseGeneratorMetadata(metaResp);
} catch (e) {
log.debug(`GetCascadeTrajectoryGeneratorMetadata failed: ${e.message}`);
}
// Fallback: if the generator metadata RPC didn't give us anything,
// check the per-step metadata we collected during polling (some LS
// versions do populate CortexStepMetadata.model_usage directly).
if (!serverUsage && usageByStep.size > 0) {
let inT = 0, outT = 0, cacheR = 0, cacheW = 0;
for (const u of usageByStep.values()) {
inT += u.inputTokens || 0;
outT += u.outputTokens || 0;
cacheR += u.cacheReadTokens || 0;
cacheW += u.cacheWriteTokens || 0;
}
if (inT || outT || cacheR || cacheW) {
serverUsage = {
inputTokens: inT,
outputTokens: outT,
cacheReadTokens: cacheR,
cacheWriteTokens: cacheW,
};
}
}
// Attach cascade metadata so the caller can check it back into the
// conversation pool. We still return the array so existing callers
// that iterate over it keep working.
chunks.cascadeId = cascadeId;
chunks.sessionId = sessionId;
chunks.toolCalls = toolCalls;
chunks.usage = serverUsage;
if (serverUsage) {
log.info(`Cascade usage: in=${serverUsage.inputTokens} out=${serverUsage.outputTokens} cache_r=${serverUsage.cacheReadTokens} cache_w=${serverUsage.cacheWriteTokens}`);
}
if (toolCalls.length) log.info(`Cascade tool calls: ${toolCalls.length}`, { names: toolCalls.map(t => t.name) });
return chunks;
} catch (err) {
onError?.(err);
throw err;
}
}
// ─── Register user (JSON REST, unchanged) ────────────────
async registerUser(firebaseToken) {
return new Promise((resolve, reject) => {
const postData = JSON.stringify({ firebase_id_token: firebaseToken });
const req = https.request({
hostname: 'api.codeium.com',
port: 443,
path: '/register_user/',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(postData),
},
}, (res) => {
let raw = '';
res.on('data', d => raw += d);
res.on('end', () => {
try {
const json = JSON.parse(raw);
if (res.statusCode >= 400) {
reject(new Error(`RegisterUser failed (${res.statusCode}): ${raw}`));
return;
}
if (!json.api_key) {
reject(new Error(`RegisterUser response missing api_key: ${raw}`));
return;
}
resolve({ apiKey: json.api_key, name: json.name, apiServerUrl: json.api_server_url });
} catch {
reject(new Error(`RegisterUser parse error: ${raw}`));
}
});
res.on('error', reject);
});
req.on('error', reject);
req.write(postData);
req.end();
});
}
// ── GetUserStatus ────────────────────────────────────────
//
// One-shot RPC that returns the account's canonical tier + cascade
// model allowlist + credit usage + trial end time. Replaces the
// probe-based tier inference for accounts where this call succeeds.
async getUserStatus() {
const proto = buildGetUserStatusRequest(this.apiKey);
const resp = await grpcUnary(
this.port, this.csrfToken,
`${LS_SERVICE}/GetUserStatus`, grpcFrame(proto), 10000,
);
return parseGetUserStatusResponse(resp);
}
}