carbon-tokenization

Running

App Files Files Community

carbon-tokenization / backend /src /auth.ts

tfrere HF Staff

fix(auth): cookie lifetime now matches the JWT exp, not a stale 8h fallback

5908601 21 days ago

raw

history blame contribute delete

14 kB

	import { whoAmI, type WhoAmIUser } from "@huggingface/hub";
	import { randomBytes } from "crypto";
	import type { Request, Response } from "express";

	/**
	* Why an authenticated user can't edit the Space.
	*
	* - `no-org-grant`: the Space is owned by an org, the user authenticated
	* successfully but didn't tick the "Allow access to <org>" checkbox on
	* HF's OAuth consent screen. The org therefore doesn't appear in
	* `whoAmI(...).orgs`, so we can't check membership and have to assume
	* permission is missing. Fix: re-authorize with `prompt=consent` and
	* tick the org this time.
	* - `not-member`: the user did grant org access (or the Space owner is a
	* plain user) and we can talk to the org members API, but the user
	* isn't in the write/admin role list. There's nothing the user can do
	* from inside the app - they need an org admin to add them.
	*/
	export type AuthIssue = "no-org-grant" \| "not-member";

	export interface AuthUser {
	name: string;
	fullName: string;
	avatarUrl: string;
	canEdit: boolean;
	/**
	* Set when the user authenticated but can't edit. Lets the login
	* page show a targeted message ("missing org grant" vs. "not a
	* member") instead of a generic "sign in" prompt that's confusing
	* when you're literally already signed in.
	*/
	accessIssue?: AuthIssue;
	/**
	* Login (HF handle) of the Space owner when it's an org. Surfaced
	* on the login page so the user knows which org to tick on the
	* consent screen. Undefined when the Space is owned by a regular
	* user.
	*/
	spaceOrg?: string;
	}

	// HF injects these env vars when hf_oauth: true is in README
	const SPACE_ID = process.env.SPACE_ID \|\| "";
	const SPACE_HOST = process.env.SPACE_HOST \|\| "";
	const OAUTH_CLIENT_ID = process.env.OAUTH_CLIENT_ID \|\| "";
	const OAUTH_CLIENT_SECRET = process.env.OAUTH_CLIENT_SECRET \|\| "";
	const OAUTH_SCOPES = process.env.OAUTH_SCOPES \|\| "openid profile";
	const OPENID_PROVIDER_URL = process.env.OPENID_PROVIDER_URL \|\| "https://huggingface.co";

	const COOKIE_NAME = "hf_access_token";

	const IS_DEV = !SPACE_ID;

	export function isOAuthEnabled(): boolean {
	return Boolean(OAUTH_CLIENT_ID && OAUTH_CLIENT_SECRET);
	}

	function getRedirectUri(): string {
	if (SPACE_HOST) return `https://${SPACE_HOST}/auth/callback`;
	// In dev, the callback goes through the Vite proxy (port 5678) -> backend
	return "http://localhost:8080/auth/callback";
	}

	function getPostLoginRedirect(): string {
	if (SPACE_HOST) return "/editor";
	// In dev, redirect to Vite dev server
	return "http://localhost:5678/";
	}

	// In-memory state store for CSRF protection (short-lived)
	const pendingStates = new Map<string, number>();

	function cleanupStates() {
	const now = Date.now();
	for (const [state, ts] of pendingStates) {
	if (now - ts > 10 * 60 * 1000) pendingStates.delete(state);
	}
	}

	/**
	* GET /oauth/authorize - redirect user to HF login.
	*
	* If the visitor lands here with `?prompt=consent`, we forward that to
	* HF so the consent screen is shown again even when the user already
	* has a live OAuth grant. This is the recovery path from the
	* "no-org-grant" login page: HF skips consent for repeat sign-ins, so
	* a user who declined the org checkbox the first time would otherwise
	* loop straight back to the failure state without ever seeing the
	* org toggle again.
	*/
	export function handleOAuthAuthorize(req: Request, res: Response) {
	cleanupStates();
	const state = randomBytes(16).toString("hex");
	pendingStates.set(state, Date.now());

	const params = new URLSearchParams({
	client_id: OAUTH_CLIENT_ID,
	redirect_uri: getRedirectUri(),
	response_type: "code",
	scope: OAUTH_SCOPES,
	state,
	});

	if (typeof req.query.prompt === "string" && req.query.prompt === "consent") {
	params.set("prompt", "consent");
	}

	res.redirect(`${OPENID_PROVIDER_URL}/oauth/authorize?${params}`);
	}

	/**
	* GET /auth/callback - exchange code for token, set cookie, redirect to editor
	*/
	export async function handleOAuthCallback(req: Request, res: Response) {
	const { code, state } = req.query as { code?: string; state?: string };

	if (!code \|\| !state \|\| !pendingStates.has(state)) {
	res.status(400).send("Invalid OAuth callback");
	return;
	}
	pendingStates.delete(state);

	try {
	const tokenRes = await fetch(`${OPENID_PROVIDER_URL}/oauth/token`, {
	method: "POST",
	headers: {
	"Content-Type": "application/x-www-form-urlencoded",
	Authorization: `Basic ${Buffer.from(`${OAUTH_CLIENT_ID}:${OAUTH_CLIENT_SECRET}`).toString("base64")}`,
	},
	body: new URLSearchParams({
	grant_type: "authorization_code",
	code,
	redirect_uri: getRedirectUri(),
	}),
	});

	if (!tokenRes.ok) {
	const text = await tokenRes.text();
	console.error("[auth] token exchange failed:", tokenRes.status, text);
	res.status(500).send("OAuth token exchange failed");
	return;
	}

	const tokenData = (await tokenRes.json()) as { access_token: string; expires_in?: number };

	// Cookie lifetime strategy: match the underlying JWT's real
	// expiration so the cookie dies exactly when the token does.
	// HF currently emits 30-day tokens, but rather than hard-code
	// that we decode the JWT's `exp` claim (it's a public field,
	// no verification needed - we just want the timestamp) and use
	// it directly. Falls back to `expires_in` from the OAuth
	// response, then to a 30-day floor as a last resort so a
	// stale 8-hour fallback can never sneak back in.
	const maxAge = computeCookieMaxAge(tokenData);

	res.cookie(COOKIE_NAME, tokenData.access_token, {
	httpOnly: true,
	secure: !IS_DEV,
	sameSite: IS_DEV ? "lax" : "none",
	maxAge,
	path: "/",
	});

	res.redirect(getPostLoginRedirect());
	} catch (err) {
	console.error("[auth] callback error:", err);
	res.status(500).send("OAuth callback error");
	}
	}

	/**
	* Compute the cookie max-age in milliseconds for an HF access token.
	*
	* Order of preference:
	* 1. Decode the JWT's `exp` claim and use `exp - now` directly.
	* HF tokens are JWTs and their `exp` is the most authoritative
	* "when does this stop working" signal we have.
	* 2. Fall back to `expires_in` from the OAuth /token response if
	* decoding failed (third-party JWT lib unavailable, malformed
	* token, etc.).
	* 3. Floor at 30 days. Anything shorter than that is almost
	* certainly a misread - HF currently issues 30-day tokens,
	* so an 8-hour fallback would just teach users to mistrust
	* the editor when it boots them out mid-day.
	* 4. Cap at 400 days. That's the maximum Chrome / Chromium-
	* derived browsers accept since cookies-with-max-age-greater-
	* than-400-days were silently clamped (RFC 6265bis section
	* 5.5). Setting a larger value would either be silently
	* truncated or trigger console warnings.
	*/
	function computeCookieMaxAge(tokenData: {
	access_token: string;
	expires_in?: number;
	}): number {
	const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
	const FOUR_HUNDRED_DAYS_MS = 400 * 24 * 60 * 60 * 1000;

	let lifeMs = 0;

	// 1. JWT exp claim. We don't verify the signature - that's HF's
	// job at every API call - we just read the exp timestamp.
	try {
	const parts = tokenData.access_token.split(".");
	if (parts.length === 3) {
	// base64url -> base64 -> JSON
	const padded = parts[1]
	.replace(/-/g, "+")
	.replace(/_/g, "/")
	.padEnd(Math.ceil(parts[1].length / 4) * 4, "=");
	const payload = JSON.parse(
	Buffer.from(padded, "base64").toString("utf-8"),
	);
	if (typeof payload?.exp === "number") {
	lifeMs = Math.max(lifeMs, payload.exp * 1000 - Date.now());
	}
	}
	} catch {
	// Malformed JWT, swallow and fall through to expires_in.
	}

	// 2. expires_in fallback.
	if (typeof tokenData.expires_in === "number") {
	lifeMs = Math.max(lifeMs, tokenData.expires_in * 1000);
	}

	// 3. & 4. Floor + cap.
	return Math.min(Math.max(lifeMs, THIRTY_DAYS_MS), FOUR_HUNDRED_DAYS_MS);
	}

	/**
	* Clear the HF access token cookie, signing the visitor out of the
	* editor. We mirror every attribute the original `res.cookie(...)`
	* call set in handleOAuthCallback (path, secure, sameSite) - cookie
	* deletion in browsers requires the Set-Cookie attributes to match
	* the original, otherwise the new "expired" cookie is treated as a
	* different cookie and the original sticks around.
	*
	* The handler is intentionally lenient about method (POST/GET): the
	* frontend uses POST + fetch (so the call can't be CSRF'd from a
	* cross-site form), but a GET fallback lets us link to /api/auth/
	* logout directly during incident response.
	*/
	export function handleOAuthLogout(_req: Request, res: Response) {
	res.clearCookie(COOKIE_NAME, {
	httpOnly: true,
	secure: !IS_DEV,
	sameSite: IS_DEV ? "lax" : "none",
	path: "/",
	});
	res.status(200).json({ ok: true });
	}

	/**
	* Extract access token from cookie header string.
	*/
	export function extractToken(cookieHeader: string \| undefined): string \| undefined {
	if (!cookieHeader) return undefined;
	const match = cookieHeader.match(new RegExp(`(?:^\|;\\s*)${COOKIE_NAME}=([^;]+)`));
	return match ? match[1] : undefined;
	}

	/**
	* Roles in an HF org that grant write access to the org's repos. We
	* accept `admin` and `write` at the org level, plus `write`/`admin`
	* on any resource group when the user is only a `contributor` at the
	* org level (HF lets contributors edit specific repos via per-group
	* roles, and the Space might live in one of those groups).
	*
	* `read` and bare `contributor` (no write group) are read-only.
	*/
	const WRITE_ROLES = new Set(["write", "admin"]);

	/**
	* Shape of `info.orgs[i]` as actually returned by the live HF
	* whoami endpoint. The official `@huggingface/hub` typings strip
	* `roleInOrg` and `resourceGroups`, but they're documented and used
	* by the Python SDK. We narrow to a local interface so the TS
	* compiler lets us read them without `any`.
	*/
	interface OrgWithRole {
	name: string;
	roleInOrg?: "admin" \| "write" \| "contributor" \| "read";
	resourceGroups?: Array<{ name: string; role: string }>;
	}

	/**
	* Resolve a HF access token into user info + edit permission.
	* Returns null when the token is missing or invalid.
	*/
	export async function resolveUser(
	accessToken: string \| undefined
	): Promise<AuthUser \| null> {
	if (!accessToken) return null;

	try {
	const info = (await whoAmI({
	accessToken,
	hubUrl: "https://huggingface.co",
	})) as WhoAmIUser;

	const name = info.name;
	const fullName = info.fullname \|\| name;
	const avatarUrl = info.avatarUrl \|\| "";

	if (IS_DEV) {
	console.log(`[auth] user=${name} canEdit=true (dev mode)`);
	return { name, fullName, avatarUrl, canEdit: true };
	}

	const access = evaluateWriteAccess(name, (info.orgs \|\| []) as OrgWithRole[]);
	console.log(
	`[auth] user=${name} canEdit=${access.canEdit}` +
	(access.issue ? ` issue=${access.issue}` : "") +
	(access.spaceOrg ? ` org=${access.spaceOrg}` : "") +
	(access.role ? ` role=${access.role}` : ""),
	);

	return {
	name,
	fullName,
	avatarUrl,
	canEdit: access.canEdit,
	accessIssue: access.issue,
	spaceOrg: access.spaceOrg,
	};
	} catch (err) {
	console.warn("[auth] whoAmI failed:", (err as Error).message);
	return null;
	}
	}

	interface WriteAccessResult {
	canEdit: boolean;
	issue?: AuthIssue;
	/** Login of the org owning the Space, when applicable. */
	spaceOrg?: string;
	/** Resolved org role, for logging. */
	role?: string;
	}

	/**
	* Decide whether the authenticated user can edit this Space, using
	* only data we got from `whoAmI` (one API round-trip, already done).
	*
	* Cases:
	* 1. SPACE_ID unset (running outside a Space) -> no access.
	* 2. User owns the Space directly -> write access.
	* 3. Space owned by an org NOT in `user.orgs` -> the OAuth grant
	* is missing the org checkbox; recoverable via re-consent.
	* 4. Space owned by an org in `user.orgs`, with role
	* `write` or `admin` at the org level -> write access.
	* 5. Same as 4 but role is `contributor`, AND the user has at
	* least one `resourceGroups[i].role` of `write`/`admin`
	* -> write access
	* (the Space probably lives in that group; HF will reject the
	* eventual push if not, but the UX is much better than
	* blocking up-front).
	* 6. Anything else (read-only role, no write group)
	* -> not-member.
	*
	* Previously this function called `/api/organizations/{name}/members`
	* to read the role, but that endpoint doesn't return a `role` field
	* for any caller - it returns plain user records. Everyone was
	* therefore stuck in case 6 even with full write access. Reading
	* `roleInOrg` from whoami is the documented path.
	*/
	function evaluateWriteAccess(
	username: string,
	orgs: OrgWithRole[],
	): WriteAccessResult {
	if (!SPACE_ID) return { canEdit: false };

	const spaceOwner = SPACE_ID.split("/")[0];
	if (spaceOwner === username) return { canEdit: true };

	const org = orgs.find((o) => o.name === spaceOwner);
	if (!org) {
	return { canEdit: false, issue: "no-org-grant", spaceOrg: spaceOwner };
	}

	const orgRole = org.roleInOrg ?? "read";
	if (WRITE_ROLES.has(orgRole)) {
	return { canEdit: true, spaceOrg: spaceOwner, role: orgRole };
	}

	const writeGroup = (org.resourceGroups \|\| []).find((g) =>
	WRITE_ROLES.has(g.role),
	);
	if (writeGroup) {
	return {
	canEdit: true,
	spaceOrg: spaceOwner,
	role: `${orgRole} via ${writeGroup.name}=${writeGroup.role}`,
	};
	}

	return {
	canEdit: false,
	issue: "not-member",
	spaceOrg: spaceOwner,
	role: orgRole,
	};
	}