File size: 13,985 Bytes
b3d2294
bafd8cb
e371833
b3d2294
7652473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3d2294
 
 
 
 
7652473
 
 
 
 
 
 
 
 
 
 
 
 
 
b3d2294
 
bafd8cb
b3d2294
bafd8cb
 
 
 
 
 
 
b3d2294
f6678ab
 
b3d2294
f6678ab
bafd8cb
 
 
 
f6678ab
bafd8cb
 
 
f6678ab
 
 
 
 
 
9fba033
 
bafd8cb
 
 
9fba033
 
bafd8cb
 
 
 
7652473
 
 
 
 
 
 
 
 
bafd8cb
7652473
bafd8cb
 
9fba033
bafd8cb
 
 
 
 
 
 
 
 
7652473
 
 
 
bafd8cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5908601
 
 
 
 
 
 
 
 
 
bafd8cb
 
 
f6678ab
 
bafd8cb
 
 
 
f6678ab
bafd8cb
 
 
 
 
 
5908601
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d10f68e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bafd8cb
 
 
 
 
 
 
b3d2294
 
7652473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3d2294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7652473
 
 
 
 
 
 
 
 
 
 
 
b3d2294
7652473
 
 
 
 
 
 
 
b3d2294
 
 
 
 
 
7652473
 
 
 
 
 
 
 
 
b3d2294
7652473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3d2294
7652473
 
 
 
 
b3d2294
7652473
 
3616e87
7652473
 
 
 
b3d2294
7652473
 
 
 
b3d2294
7652473
 
 
 
 
 
 
 
 
b3d2294
7652473
 
 
 
 
 
 
b3d2294
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
import { whoAmI, type WhoAmIUser } from "@huggingface/hub";
import { randomBytes } from "crypto";
import type { Request, Response } from "express";

/**
 * Why an authenticated user can't edit the Space.
 *
 * - `no-org-grant`: the Space is owned by an org, the user authenticated
 *   successfully but didn't tick the "Allow access to <org>" checkbox on
 *   HF's OAuth consent screen. The org therefore doesn't appear in
 *   `whoAmI(...).orgs`, so we can't check membership and have to assume
 *   permission is missing. Fix: re-authorize with `prompt=consent` and
 *   tick the org this time.
 * - `not-member`: the user did grant org access (or the Space owner is a
 *   plain user) and we can talk to the org members API, but the user
 *   isn't in the write/admin role list. There's nothing the user can do
 *   from inside the app - they need an org admin to add them.
 */
export type AuthIssue = "no-org-grant" | "not-member";

export interface AuthUser {
  name: string;
  fullName: string;
  avatarUrl: string;
  canEdit: boolean;
  /**
   * Set when the user authenticated but can't edit. Lets the login
   * page show a targeted message ("missing org grant" vs. "not a
   * member") instead of a generic "sign in" prompt that's confusing
   * when you're literally already signed in.
   */
  accessIssue?: AuthIssue;
  /**
   * Login (HF handle) of the Space owner when it's an org. Surfaced
   * on the login page so the user knows which org to tick on the
   * consent screen. Undefined when the Space is owned by a regular
   * user.
   */
  spaceOrg?: string;
}

// HF injects these env vars when hf_oauth: true is in README
const SPACE_ID = process.env.SPACE_ID || "";
const SPACE_HOST = process.env.SPACE_HOST || "";
const OAUTH_CLIENT_ID = process.env.OAUTH_CLIENT_ID || "";
const OAUTH_CLIENT_SECRET = process.env.OAUTH_CLIENT_SECRET || "";
const OAUTH_SCOPES = process.env.OAUTH_SCOPES || "openid profile";
const OPENID_PROVIDER_URL = process.env.OPENID_PROVIDER_URL || "https://huggingface.co";

const COOKIE_NAME = "hf_access_token";

const IS_DEV = !SPACE_ID;

export function isOAuthEnabled(): boolean {
  return Boolean(OAUTH_CLIENT_ID && OAUTH_CLIENT_SECRET);
}

function getRedirectUri(): string {
  if (SPACE_HOST) return `https://${SPACE_HOST}/auth/callback`;
  // In dev, the callback goes through the Vite proxy (port 5678) -> backend
  return "http://localhost:8080/auth/callback";
}

function getPostLoginRedirect(): string {
  if (SPACE_HOST) return "/editor";
  // In dev, redirect to Vite dev server
  return "http://localhost:5678/";
}

// In-memory state store for CSRF protection (short-lived)
const pendingStates = new Map<string, number>();

function cleanupStates() {
  const now = Date.now();
  for (const [state, ts] of pendingStates) {
    if (now - ts > 10 * 60 * 1000) pendingStates.delete(state);
  }
}

/**
 * GET /oauth/authorize - redirect user to HF login.
 *
 * If the visitor lands here with `?prompt=consent`, we forward that to
 * HF so the consent screen is shown again even when the user already
 * has a live OAuth grant. This is the recovery path from the
 * "no-org-grant" login page: HF skips consent for repeat sign-ins, so
 * a user who declined the org checkbox the first time would otherwise
 * loop straight back to the failure state without ever seeing the
 * org toggle again.
 */
export function handleOAuthAuthorize(req: Request, res: Response) {
  cleanupStates();
  const state = randomBytes(16).toString("hex");
  pendingStates.set(state, Date.now());

  const params = new URLSearchParams({
    client_id: OAUTH_CLIENT_ID,
    redirect_uri: getRedirectUri(),
    response_type: "code",
    scope: OAUTH_SCOPES,
    state,
  });

  if (typeof req.query.prompt === "string" && req.query.prompt === "consent") {
    params.set("prompt", "consent");
  }

  res.redirect(`${OPENID_PROVIDER_URL}/oauth/authorize?${params}`);
}

/**
 * GET /auth/callback - exchange code for token, set cookie, redirect to editor
 */
export async function handleOAuthCallback(req: Request, res: Response) {
  const { code, state } = req.query as { code?: string; state?: string };

  if (!code || !state || !pendingStates.has(state)) {
    res.status(400).send("Invalid OAuth callback");
    return;
  }
  pendingStates.delete(state);

  try {
    const tokenRes = await fetch(`${OPENID_PROVIDER_URL}/oauth/token`, {
      method: "POST",
      headers: {
        "Content-Type": "application/x-www-form-urlencoded",
        Authorization: `Basic ${Buffer.from(`${OAUTH_CLIENT_ID}:${OAUTH_CLIENT_SECRET}`).toString("base64")}`,
      },
      body: new URLSearchParams({
        grant_type: "authorization_code",
        code,
        redirect_uri: getRedirectUri(),
      }),
    });

    if (!tokenRes.ok) {
      const text = await tokenRes.text();
      console.error("[auth] token exchange failed:", tokenRes.status, text);
      res.status(500).send("OAuth token exchange failed");
      return;
    }

    const tokenData = (await tokenRes.json()) as { access_token: string; expires_in?: number };

    // Cookie lifetime strategy: match the underlying JWT's real
    // expiration so the cookie dies exactly when the token does.
    // HF currently emits 30-day tokens, but rather than hard-code
    // that we decode the JWT's `exp` claim (it's a public field,
    // no verification needed - we just want the timestamp) and use
    // it directly. Falls back to `expires_in` from the OAuth
    // response, then to a 30-day floor as a last resort so a
    // stale 8-hour fallback can never sneak back in.
    const maxAge = computeCookieMaxAge(tokenData);

    res.cookie(COOKIE_NAME, tokenData.access_token, {
      httpOnly: true,
      secure: !IS_DEV,
      sameSite: IS_DEV ? "lax" : "none",
      maxAge,
      path: "/",
    });

    res.redirect(getPostLoginRedirect());
  } catch (err) {
    console.error("[auth] callback error:", err);
    res.status(500).send("OAuth callback error");
  }
}

/**
 * Compute the cookie max-age in milliseconds for an HF access token.
 *
 * Order of preference:
 *   1. Decode the JWT's `exp` claim and use `exp - now` directly.
 *      HF tokens are JWTs and their `exp` is the most authoritative
 *      "when does this stop working" signal we have.
 *   2. Fall back to `expires_in` from the OAuth /token response if
 *      decoding failed (third-party JWT lib unavailable, malformed
 *      token, etc.).
 *   3. Floor at 30 days. Anything shorter than that is almost
 *      certainly a misread - HF currently issues 30-day tokens,
 *      so an 8-hour fallback would just teach users to mistrust
 *      the editor when it boots them out mid-day.
 *   4. Cap at 400 days. That's the maximum Chrome / Chromium-
 *      derived browsers accept since cookies-with-max-age-greater-
 *      than-400-days were silently clamped (RFC 6265bis section
 *      5.5). Setting a larger value would either be silently
 *      truncated or trigger console warnings.
 */
function computeCookieMaxAge(tokenData: {
  access_token: string;
  expires_in?: number;
}): number {
  const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
  const FOUR_HUNDRED_DAYS_MS = 400 * 24 * 60 * 60 * 1000;

  let lifeMs = 0;

  // 1. JWT exp claim. We don't verify the signature - that's HF's
  //    job at every API call - we just read the exp timestamp.
  try {
    const parts = tokenData.access_token.split(".");
    if (parts.length === 3) {
      // base64url -> base64 -> JSON
      const padded = parts[1]
        .replace(/-/g, "+")
        .replace(/_/g, "/")
        .padEnd(Math.ceil(parts[1].length / 4) * 4, "=");
      const payload = JSON.parse(
        Buffer.from(padded, "base64").toString("utf-8"),
      );
      if (typeof payload?.exp === "number") {
        lifeMs = Math.max(lifeMs, payload.exp * 1000 - Date.now());
      }
    }
  } catch {
    // Malformed JWT, swallow and fall through to expires_in.
  }

  // 2. expires_in fallback.
  if (typeof tokenData.expires_in === "number") {
    lifeMs = Math.max(lifeMs, tokenData.expires_in * 1000);
  }

  // 3. & 4. Floor + cap.
  return Math.min(Math.max(lifeMs, THIRTY_DAYS_MS), FOUR_HUNDRED_DAYS_MS);
}

/**
 * Clear the HF access token cookie, signing the visitor out of the
 * editor. We mirror every attribute the original `res.cookie(...)`
 * call set in handleOAuthCallback (path, secure, sameSite) - cookie
 * deletion in browsers requires the Set-Cookie attributes to match
 * the original, otherwise the new "expired" cookie is treated as a
 * different cookie and the original sticks around.
 *
 * The handler is intentionally lenient about method (POST/GET): the
 * frontend uses POST + fetch (so the call can't be CSRF'd from a
 * cross-site form), but a GET fallback lets us link to /api/auth/
 * logout directly during incident response.
 */
export function handleOAuthLogout(_req: Request, res: Response) {
  res.clearCookie(COOKIE_NAME, {
    httpOnly: true,
    secure: !IS_DEV,
    sameSite: IS_DEV ? "lax" : "none",
    path: "/",
  });
  res.status(200).json({ ok: true });
}

/**
 * Extract access token from cookie header string.
 */
export function extractToken(cookieHeader: string | undefined): string | undefined {
  if (!cookieHeader) return undefined;
  const match = cookieHeader.match(new RegExp(`(?:^|;\\s*)${COOKIE_NAME}=([^;]+)`));
  return match ? match[1] : undefined;
}

/**
 * Roles in an HF org that grant write access to the org's repos. We
 * accept `admin` and `write` at the org level, plus `write`/`admin`
 * on any resource group when the user is only a `contributor` at the
 * org level (HF lets contributors edit specific repos via per-group
 * roles, and the Space might live in one of those groups).
 *
 * `read` and bare `contributor` (no write group) are read-only.
 */
const WRITE_ROLES = new Set(["write", "admin"]);

/**
 * Shape of `info.orgs[i]` as actually returned by the live HF
 * whoami endpoint. The official `@huggingface/hub` typings strip
 * `roleInOrg` and `resourceGroups`, but they're documented and used
 * by the Python SDK. We narrow to a local interface so the TS
 * compiler lets us read them without `any`.
 */
interface OrgWithRole {
  name: string;
  roleInOrg?: "admin" | "write" | "contributor" | "read";
  resourceGroups?: Array<{ name: string; role: string }>;
}

/**
 * Resolve a HF access token into user info + edit permission.
 * Returns null when the token is missing or invalid.
 */
export async function resolveUser(
  accessToken: string | undefined
): Promise<AuthUser | null> {
  if (!accessToken) return null;

  try {
    const info = (await whoAmI({
      accessToken,
      hubUrl: "https://huggingface.co",
    })) as WhoAmIUser;

    const name = info.name;
    const fullName = info.fullname || name;
    const avatarUrl = info.avatarUrl || "";

    if (IS_DEV) {
      console.log(`[auth] user=${name} canEdit=true (dev mode)`);
      return { name, fullName, avatarUrl, canEdit: true };
    }

    const access = evaluateWriteAccess(name, (info.orgs || []) as OrgWithRole[]);
    console.log(
      `[auth] user=${name} canEdit=${access.canEdit}` +
        (access.issue ? ` issue=${access.issue}` : "") +
        (access.spaceOrg ? ` org=${access.spaceOrg}` : "") +
        (access.role ? ` role=${access.role}` : ""),
    );

    return {
      name,
      fullName,
      avatarUrl,
      canEdit: access.canEdit,
      accessIssue: access.issue,
      spaceOrg: access.spaceOrg,
    };
  } catch (err) {
    console.warn("[auth] whoAmI failed:", (err as Error).message);
    return null;
  }
}

interface WriteAccessResult {
  canEdit: boolean;
  issue?: AuthIssue;
  /** Login of the org owning the Space, when applicable. */
  spaceOrg?: string;
  /** Resolved org role, for logging. */
  role?: string;
}

/**
 * Decide whether the authenticated user can edit this Space, using
 * only data we got from `whoAmI` (one API round-trip, already done).
 *
 * Cases:
 * 1. SPACE_ID unset (running outside a Space)  -> no access.
 * 2. User owns the Space directly              -> write access.
 * 3. Space owned by an org NOT in `user.orgs`  -> the OAuth grant
 *    is missing the org checkbox; recoverable via re-consent.
 * 4. Space owned by an org in `user.orgs`, with role
 *    `write` or `admin` at the org level       -> write access.
 * 5. Same as 4 but role is `contributor`, AND the user has at
 *    least one `resourceGroups[i].role` of `write`/`admin`
 *                                              -> write access
 *    (the Space probably lives in that group; HF will reject the
 *    eventual push if not, but the UX is much better than
 *    blocking up-front).
 * 6. Anything else (read-only role, no write group)
 *                                              -> not-member.
 *
 * Previously this function called `/api/organizations/{name}/members`
 * to read the role, but that endpoint doesn't return a `role` field
 * for any caller - it returns plain user records. Everyone was
 * therefore stuck in case 6 even with full write access. Reading
 * `roleInOrg` from whoami is the documented path.
 */
function evaluateWriteAccess(
  username: string,
  orgs: OrgWithRole[],
): WriteAccessResult {
  if (!SPACE_ID) return { canEdit: false };

  const spaceOwner = SPACE_ID.split("/")[0];
  if (spaceOwner === username) return { canEdit: true };

  const org = orgs.find((o) => o.name === spaceOwner);
  if (!org) {
    return { canEdit: false, issue: "no-org-grant", spaceOrg: spaceOwner };
  }

  const orgRole = org.roleInOrg ?? "read";
  if (WRITE_ROLES.has(orgRole)) {
    return { canEdit: true, spaceOrg: spaceOwner, role: orgRole };
  }

  const writeGroup = (org.resourceGroups || []).find((g) =>
    WRITE_ROLES.has(g.role),
  );
  if (writeGroup) {
    return {
      canEdit: true,
      spaceOrg: spaceOwner,
      role: `${orgRole} via ${writeGroup.name}=${writeGroup.role}`,
    };
  }

  return {
    canEdit: false,
    issue: "not-member",
    spaceOrg: spaceOwner,
    role: orgRole,
  };
}