${safeTitle}
${safeDescription}
${noteHtml ? `${safeDescription}
${noteHtml ? `${safeOrg} organization on the consent screen:`,
`${safeOrg} on, then confirm.${safeOrg || "the Space"} to add you as a write member. ` +
`If you have multiple Hugging Face accounts, you can also sign out and try again with the right one.`,
buttonLabel: "Sign in with another account",
buttonHref: "/oauth/authorize?prompt=consent",
};
}
export function createApp() {
const DATA_DIR = getDataDir();
mkdirSync(DATA_DIR, { recursive: true });
const oauthEnabled = isOAuthEnabled();
// ---------- Hocuspocus (Y.js collaboration server) ----------
const hocuspocus = new Hocuspocus({
async onAuthenticate({ token, context }: { token: string; context: any }) {
if (!oauthEnabled) return;
const { resolveUser } = await import("./auth.js");
// Two-source pattern: the HocuspocusProvider client sends `token`
// via the WS sub-protocol, but our cookie is httpOnly so the
// client can't read it and sends "" instead. Fall back to the
// cookie the upgrade handler stuffed into context.token.
const authToken = token || context?.token;
// Surface enough info in the Space logs to triage "Disconnected"
// reports without leaking the token itself. We log:
// - whether each source produced a token (just truthiness)
// - the resolved user (or null) + accessIssue when present
// - the SPACE_ID owner being checked, to spot org mismatches
const tokenSource = token
? "client"
: context?.token
? "cookie"
: "none";
const tokenLen = authToken ? authToken.length : 0;
const user = await resolveUser(authToken);
const spaceOwner = (process.env.SPACE_ID || "").split("/")[0] || "(none)";
if (!user) {
console.warn(
`[ws-auth] reject: no user resolved` +
` source=${tokenSource} tokenLen=${tokenLen}` +
` spaceOwner=${spaceOwner}`,
);
throw new Error("Unauthorized: invalid or missing HF token");
}
if (!user.canEdit) {
console.warn(
`[ws-auth] reject: ${user.name} can't write to ${spaceOwner}` +
` issue=${user.accessIssue ?? "unknown"}` +
` source=${tokenSource}`,
);
throw new Error(
`Unauthorized: ${user.name} has no write access to ${spaceOwner}` +
(user.accessIssue ? ` (${user.accessIssue})` : ""),
);
}
console.log(
`[ws-auth] accept user=${user.name} spaceOwner=${spaceOwner}` +
` source=${tokenSource}`,
);
if (authToken) setUserToken(authToken);
return { user };
},
extensions: [
new Database({
fetch: async ({ documentName }: { documentName: string }) => {
try {
const p = docPath(documentName);
if (existsSync(p)) {
const buf = readFileSync(p);
console.log(`[persist] fetch "${documentName}" from disk: ${buf.length} bytes`);
return buf;
}
console.log(`[persist] fetch "${documentName}": no file on disk`);
if (isHfStorageEnabled()) {
const data = await pullDocument(documentName);
if (data) {
writeFileSync(p, data);
console.log(`[persist] pulled ${documentName} from HF`);
return Buffer.from(data);
}
}
} catch (err) {
console.error(`[persist] fetch "${documentName}" failed:`, (err as Error).message);
}
return null;
},
store: async () => {},
}),
{
async onChange({ documentName, document }: { documentName: string; document: any }) {
console.log(`[persist] onChange "${documentName}"`);
debouncedSave(documentName, document);
},
async afterLoadDocument({ documentName }: { documentName: string }) {
console.log(`[persist] loaded "${documentName}"`);
},
} as any,
],
});
// ---------- Express app ----------
const app = express();
const httpServer = createServer(app);
app.use(express.json({ limit: "1mb" }));
const authCtx = { oauthEnabled };
const requireEditor = createRequireEditor(authCtx);
app.use(createAuthRouter(authCtx));
app.use(createChatRouter(requireEditor));
app.use("/api/citations", citationsRouter);
app.use(createPublishRouter({ oauthEnabled, hocuspocus }));
app.use(createUploadRouter());
app.use(createStorageRouter({ oauthEnabled }));
// Reverse proxy for private-dataset assets. Mounted before any
// static serving so `/d/*` always wins, never falls through to a
// 404 from express.static.
app.use(createDatasetProxyRouter());
// ---------- Collab WebSocket ----------
const wss = new WebSocketServer({ noServer: true });
httpServer.on("upgrade", (req, socket, head) => {
const url = req.url || "";
if (url === "/collab" || url.startsWith("/collab/") || url.startsWith("/collab?")) {
console.log(`[ws] upgrade request for ${url}`);
wss.handleUpgrade(req, socket, head, (ws) => {
ws.setMaxListeners(Infinity);
ws.on("error", (error) => {
console.error("[ws] socket error:", error.message);
});
if (process.env.NODE_ENV !== "production") {
ws.on("message", (data: Buffer, isBinary: boolean) => {
const buf = Buffer.isBuffer(data) ? data : Buffer.from(data as any);
console.log(`[ws-debug] msg ${buf.length}B binary=${isBinary} first20=${buf.slice(0, 20).toString("hex")}`);
});
}
const token = extractToken(req.headers.cookie);
// Diagnostic for "Disconnected" reports: confirms whether the
// browser actually attached our session cookie to the WS
// upgrade. On HF Spaces, some gating setups occasionally
// strip cookies on WS upgrades even when they're sent for
// plain HTTP, which manifests as a working /editor route
// but a permanently-failing WS auth.
const cookieHeader = req.headers.cookie || "";
console.log(
`[ws] upgrade cookies=${cookieHeader.length}B hasToken=${Boolean(token)}`,
);
hocuspocus.handleConnection(ws, req, { token });
});
} else {
console.log(`[ws] rejected upgrade for ${url}`);
socket.destroy();
}
});
// ---------- Static assets ----------
app.use("/uploads", express.static(join(DATA_DIR, "uploads")));
app.use("/published", express.static(join(DATA_DIR, "published")));
const staticDir =
process.env.NODE_ENV === "production"
? join(DATA_DIR, "..", "frontend-dist")
: join(DATA_DIR, "..", "..", "frontend", "dist");
function getPublishedPath(docName: string): string {
return join(DATA_DIR, "published", docName, "index.html");
}
function getPublishedAssetPath(docName: string, filename: string): string {
return join(DATA_DIR, "published", docName, filename);
}
if (existsSync(staticDir)) {
app.use(express.static(staticDir, { index: false }));
// ---- LLM-friendly endpoints --------------------------------------
// The publisher generates a Markdown twin of the article (`llms.txt`)
// following the https://llmstxt.org/ convention. We expose it at the
// Space root so external agents/crawlers (Claude, Perplexity, ...) can
// consume the article without having to parse the heavy HTML page.
// `robots.txt` advertises this endpoint.
app.get("/llms.txt", async (_req, res) => {
const llmsPath = getPublishedAssetPath(DEFAULT_DOC_NAME, "llms.txt");
if (!existsSync(llmsPath)) {
res.status(404).type("text/plain").send("Not yet published");
return;
}
res.type("text/markdown; charset=utf-8");
res.sendFile(llmsPath);
});
app.get("/robots.txt", (_req, res) => {
res
.type("text/plain; charset=utf-8")
.send(
[
"User-agent: *",
"Allow: /",
"",
"LLMs-Txt: /llms.txt",
"",
].join("\n"),
);
});
app.get("/editor", async (req, res) => {
if (oauthEnabled) {
const { resolveUser } = await import("./auth.js");
const token = extractToken(req.headers.cookie);
const user = await resolveUser(token);
if (!user || !user.canEdit) {
res.status(200).send(renderLoginPage(buildEditorLoginPage(user)));
return;
}
}
res.sendFile(join(staticDir, "index.html"));
});
app.get("*", async (req, res) => {
if (!oauthEnabled) {
res.sendFile(join(staticDir, "index.html"));
return;
}
const visitorToken = extractToken(req.headers.cookie) ?? undefined;
await ensurePublishedRestored(visitorToken);
const publishedPath = getPublishedPath(DEFAULT_DOC_NAME);
if (existsSync(publishedPath)) {
res.sendFile(publishedPath);
return;
}
sendLoginPage(res);
});
}
return { app, httpServer, hocuspocus, wss };
}