Spaces:
Sleeping
Sleeping
File size: 5,249 Bytes
f6266b9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | /**
* Strip server-internal filesystem paths from model output before it reaches
* the API caller.
*
* Background: Cascade's baked-in system context tells the model its workspace
* lives at /tmp/windsurf-workspace. Even after we removed CascadeToolConfig
* .run_command (see windsurf.js buildCascadeConfig) the model still
* (a) narrates "I'll look at /tmp/windsurf-workspace/config.yaml" in plain
* text, and
* (b) occasionally emits built-in edit_file / view_file / list_directory
* trajectory steps whose argumentsJson references these paths.
* Both routes leak the proxy's internal filesystem layout to API callers.
*
* This module provides two scrubbers:
* - sanitizeText(s) β one-shot, use on accumulated buffers
* - PathSanitizeStream β incremental, use on streaming chunks
*
* The streaming version holds back any tail that could be an incomplete
* prefix of a sensitive literal OR a match-in-progress whose path-tail hasn't
* hit a terminator yet, so a path cannot slip through by straddling a chunk
* boundary.
*/
// Literal prefixes that must never appear in output. First-match wins in the
// order given. The workspace literal is replaced with "." so text like
// "/tmp/windsurf-workspace/foo.py" becomes "./foo.py" (still readable). The
// other two go to "[internal]" β no reason a caller should ever see them.
const PATTERNS = [
[/\/tmp\/windsurf-workspace(\/[^\s"'`<>)}\],*;]*)?/g, '.$1'],
[/\/opt\/windsurf(?:\/[^\s"'`<>)}\],*;]*)?/g, '[internal]'],
[/\/root\/WindsurfAPI(?:\/[^\s"'`<>)}\],*;]*)?/g, '[internal]'],
];
// Bare literals (no path tail) used by the streaming cut-point finder.
const SENSITIVE_LITERALS = [
'/tmp/windsurf-workspace',
'/opt/windsurf',
'/root/WindsurfAPI',
];
// Character class that counts as part of a path body. Mirrors the PATTERNS
// regex char class so cut-point detection matches replacement behaviour.
const PATH_BODY_RE = /[^\s"'`<>)}\],*;]/;
/**
* Apply all path redactions to `s` in one pass. Safe to call on any string;
* non-strings and empty strings are returned unchanged.
*/
export function sanitizeText(s) {
if (typeof s !== 'string' || !s) return s;
let out = s;
for (const [re, rep] of PATTERNS) out = out.replace(re, rep);
return out;
}
/**
* Incremental sanitizer for streamed deltas.
*
* Usage:
* const stream = new PathSanitizeStream();
* for (const chunk of deltas) emit(stream.feed(chunk));
* emit(stream.flush());
*
* The returned string from feed()/flush() is guaranteed to contain no
* sensitive literal. Any trailing text that COULD extend into a sensitive
* literal (either as a partial prefix or as an unterminated path tail) is
* held internally until the next feed or the flush.
*/
export class PathSanitizeStream {
constructor() {
this.buffer = '';
}
feed(delta) {
if (!delta) return '';
this.buffer += delta;
const cut = this._safeCutPoint();
if (cut === 0) return '';
const safeRegion = this.buffer.slice(0, cut);
this.buffer = this.buffer.slice(cut);
return sanitizeText(safeRegion);
}
// Largest index into this.buffer such that buffer[0:cut] contains no
// match that could extend past `cut`. Two conditions back off the cut:
// (1) a full sensitive literal was found but its path body ran to the
// end of the buffer β the next delta might append more path chars,
// in which case the fully-rendered path would differ. Hold from the
// literal's start.
// (2) the buffer tail is itself a proper prefix of a sensitive literal
// (e.g., ends with "/tmp/win") β the next delta might complete it.
// Hold from that tail start.
_safeCutPoint() {
const buf = this.buffer;
const len = buf.length;
let cut = len;
// (1) unterminated full literal
for (const lit of SENSITIVE_LITERALS) {
let searchFrom = 0;
while (searchFrom < len) {
const idx = buf.indexOf(lit, searchFrom);
if (idx === -1) break;
let end = idx + lit.length;
while (end < len && PATH_BODY_RE.test(buf[end])) end++;
if (end === len) {
if (idx < cut) cut = idx;
break;
}
searchFrom = end + 1;
}
}
// (2) partial-prefix tail
for (const lit of SENSITIVE_LITERALS) {
const maxLen = Math.min(lit.length - 1, len);
for (let plen = maxLen; plen > 0; plen--) {
if (buf.endsWith(lit.slice(0, plen))) {
const start = len - plen;
if (start < cut) cut = start;
break;
}
}
}
return cut;
}
flush() {
const out = sanitizeText(this.buffer);
this.buffer = '';
return out;
}
}
/**
* Sanitize a native Cascade tool call (built-in tools like edit_file /
* view_file) before surfacing to the client. Scrubs argumentsJson and
* result. Not used on the hot path today β handlers/chat.js drops all
* native tool calls in non-emulation mode rather than risking leakage β
* but kept here for opt-in use.
*/
export function sanitizeToolCall(tc) {
if (!tc) return tc;
return {
...tc,
argumentsJson: sanitizeText(tc.argumentsJson || ''),
result: sanitizeText(tc.result || ''),
};
}
|