tfrere HF Staff Cursor commited on
Commit
d062662
·
1 Parent(s): a286ca4

fix(publisher): render math server-side via KaTeX

Browse files

TipTap's math extension emits empty inline/block-math placeholders during
generateHTML (rendering is a view-only step). Add a math transformer that
fills those placeholders with static KaTeX HTML so formulas appear in the
published article. Pin the KaTeX CSS CDN version to the server-side katex.

Co-authored-by: Cursor <cursoragent@cursor.com>

backend/package.json CHANGED
@@ -37,6 +37,7 @@
37
  "ai": "^6.0.158",
38
  "dotenv": "^17.4.1",
39
  "express": "^4.21.0",
 
40
  "linkedom": "^0.18.12",
41
  "lowlight": "^3.3.0",
42
  "multer": "^2.1.1",
 
37
  "ai": "^6.0.158",
38
  "dotenv": "^17.4.1",
39
  "express": "^4.21.0",
40
+ "katex": "^0.16.45",
41
  "linkedom": "^0.18.12",
42
  "lowlight": "^3.3.0",
43
  "multer": "^2.1.1",
backend/src/publisher/html-renderer.ts CHANGED
@@ -133,9 +133,9 @@ export async function renderArticleHTML(
133
  <meta name="twitter:description" content="${safeDesc}">
134
  ${meta.ogImage ? `<meta name="twitter:image" content="${escapeHtml(meta.ogImage)}">` : ""}
135
 
136
- <!-- KaTeX CSS -->
137
- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.21/dist/katex.min.css"
138
- integrity="sha384-zh0CIslj+VczCZtlzBcjt5ppRcsAmDnRem7ESsYwWwg3m/OaJ2l4x7YBZl9Kxxib"
139
  crossorigin="anonymous">
140
 
141
  <!-- Code blocks are pre-highlighted at publish time with Shiki (dual-theme
 
133
  <meta name="twitter:description" content="${safeDesc}">
134
  ${meta.ogImage ? `<meta name="twitter:image" content="${escapeHtml(meta.ogImage)}">` : ""}
135
 
136
+ <!-- KaTeX CSS (version pinned to the katex used server-side in the math transformer) -->
137
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.45/dist/katex.min.css"
138
+ integrity="sha384-exTa2AZBTSYZY6XrZeHr+SthVy0uzRopdM/I9+nd/JMLcTcklL0+cMoJGes1iO1i"
139
  crossorigin="anonymous">
140
 
141
  <!-- Code blocks are pre-highlighted at publish time with Shiki (dual-theme
backend/src/publisher/transformers/index.ts CHANGED
@@ -18,7 +18,10 @@
18
  * remote URL into a standard `<figure><iframe src=...>`.
19
  * 8. HfUser — independent; converts atomic placeholder divs into
20
  * Hugging Face user profile cards.
21
- * 9. Footnote runs last so collected texts include those inside every
 
 
 
22
  * other transformed block (tables, callouts, accordions...).
23
  */
24
  import type { Transformer } from "./types.js";
@@ -30,6 +33,7 @@ import { highlightCodeTransformer } from "./highlight-code.js";
30
  import { htmlEmbedTransformer } from "./html-embed.js";
31
  import { iframeEmbedTransformer } from "./iframe-embed.js";
32
  import { hfUserTransformer } from "./hf-user.js";
 
33
  import { footnoteTransformer } from "./footnote.js";
34
 
35
  export const transformers: Transformer[] = [
@@ -41,6 +45,7 @@ export const transformers: Transformer[] = [
41
  htmlEmbedTransformer,
42
  iframeEmbedTransformer,
43
  hfUserTransformer,
 
44
  footnoteTransformer,
45
  ];
46
 
 
18
  * remote URL into a standard `<figure><iframe src=...>`.
19
  * 8. HfUser — independent; converts atomic placeholder divs into
20
  * Hugging Face user profile cards.
21
+ * 9. Math renders the empty `data-type="inline-math"/"block-math"`
22
+ * placeholders into static KaTeX HTML. Runs before
23
+ * Footnote so math inside a footnote is captured rendered.
24
+ * 10. Footnote — runs last so collected texts include those inside every
25
  * other transformed block (tables, callouts, accordions...).
26
  */
27
  import type { Transformer } from "./types.js";
 
33
  import { htmlEmbedTransformer } from "./html-embed.js";
34
  import { iframeEmbedTransformer } from "./iframe-embed.js";
35
  import { hfUserTransformer } from "./hf-user.js";
36
+ import { mathTransformer } from "./math.js";
37
  import { footnoteTransformer } from "./footnote.js";
38
 
39
  export const transformers: Transformer[] = [
 
45
  htmlEmbedTransformer,
46
  iframeEmbedTransformer,
47
  hfUserTransformer,
48
+ mathTransformer,
49
  footnoteTransformer,
50
  ];
51
 
backend/src/publisher/transformers/math.ts ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Transformer } from "./types.js";
2
+ import katex from "katex";
3
+
4
+ /**
5
+ * Renders math placeholders into static KaTeX HTML.
6
+ *
7
+ * `@tiptap/extension-mathematics` only produces empty placeholders in its
8
+ * static `renderHTML` (the actual KaTeX rendering lives in a client-side
9
+ * NodeView):
10
+ * - inline: `<span data-type="inline-math" data-latex="...">`
11
+ * - block: `<div data-type="block-math" data-latex="...">`
12
+ *
13
+ * `generateHTML()` (used by the publisher) never runs the NodeView, so without
14
+ * this transformer the published page would contain empty, invisible math
15
+ * nodes. We render the `data-latex` server-side with `katex.renderToString`
16
+ * so the published article is fully static (the page already loads the KaTeX
17
+ * stylesheet; no client-side JS is required).
18
+ */
19
+ const SELECTOR = '[data-type="inline-math"], [data-type="block-math"]';
20
+
21
+ export const mathTransformer: Transformer = {
22
+ name: "math",
23
+ apply(document) {
24
+ for (const el of [...document.querySelectorAll(SELECTOR)]) {
25
+ const latex = el.getAttribute("data-latex") || "";
26
+ const displayMode = el.getAttribute("data-type") === "block-math";
27
+
28
+ if (!latex.trim()) {
29
+ // Nothing to render: drop the empty placeholder so it leaves no
30
+ // invisible gap in the published article.
31
+ el.remove();
32
+ continue;
33
+ }
34
+
35
+ // `throwOnError: false` makes KaTeX emit a red error node instead of
36
+ // throwing, mirroring the editor's behaviour and keeping publish
37
+ // resilient to a single malformed expression.
38
+ el.innerHTML = katex.renderToString(latex, {
39
+ displayMode,
40
+ throwOnError: false,
41
+ output: "htmlAndMathml",
42
+ });
43
+ }
44
+ },
45
+ };
backend/tests/__snapshots__/html-renderer-snapshot.test.ts.snap CHANGED
@@ -61,9 +61,9 @@ exports[`snapshot - full render > matches snapshot for a typical article 1`] = `
61
  <meta name="twitter:description" content="A test article">
62
 
63
 
64
- <!-- KaTeX CSS -->
65
- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.21/dist/katex.min.css"
66
- integrity="sha384-zh0CIslj+VczCZtlzBcjt5ppRcsAmDnRem7ESsYwWwg3m/OaJ2l4x7YBZl9Kxxib"
67
  crossorigin="anonymous">
68
 
69
  <!-- Code blocks are pre-highlighted at publish time with Shiki (dual-theme
 
61
  <meta name="twitter:description" content="A test article">
62
 
63
 
64
+ <!-- KaTeX CSS (version pinned to the katex used server-side in the math transformer) -->
65
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.45/dist/katex.min.css"
66
+ integrity="sha384-exTa2AZBTSYZY6XrZeHr+SthVy0uzRopdM/I9+nd/JMLcTcklL0+cMoJGes1iO1i"
67
  crossorigin="anonymous">
68
 
69
  <!-- Code blocks are pre-highlighted at publish time with Shiki (dual-theme
backend/tests/math-transformer.test.ts ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Math transformer tests.
3
+ *
4
+ * `@tiptap/extension-mathematics` only emits empty placeholders in its static
5
+ * `renderHTML`, so the publisher must render them server-side. These tests
6
+ * exercise the full publish path (renderArticleHTML) to guarantee the
7
+ * published article contains rendered KaTeX, never raw LaTeX or empty nodes.
8
+ */
9
+ import { describe, it, expect } from "vitest";
10
+ import { renderArticleHTML, type PublishMeta } from "../src/publisher/html-renderer.js";
11
+ import type { PublishCSS } from "../src/publisher/index.js";
12
+
13
+ const EMPTY_CSS: PublishCSS = {
14
+ variables: "",
15
+ reset: "",
16
+ base: "",
17
+ layout: "",
18
+ print: "",
19
+ editorTokens: "",
20
+ article: "",
21
+ components: "",
22
+ publisher: "",
23
+ };
24
+
25
+ const META: PublishMeta = {
26
+ title: "Math Article",
27
+ description: "A test article with math",
28
+ authors: [{ name: "Alice", affiliationIndices: [1], affiliationNames: ["MIT"] }],
29
+ affiliations: [{ name: "MIT" }],
30
+ date: "2025-01-01",
31
+ };
32
+
33
+ const doc = (content: any[]) => ({ type: "doc", content });
34
+
35
+ describe("math transformer", () => {
36
+ it("renders an inline math placeholder into static KaTeX", async () => {
37
+ const json = doc([
38
+ {
39
+ type: "paragraph",
40
+ content: [
41
+ { type: "text", text: "Energy is " },
42
+ { type: "inlineMath", attrs: { latex: "E = mc^2" } },
43
+ { type: "text", text: " famously." },
44
+ ],
45
+ },
46
+ ]);
47
+ const html = await renderArticleHTML(json, META, EMPTY_CSS);
48
+
49
+ expect(html).toContain('class="katex"');
50
+ // KaTeX keeps the source in a MathML annotation, which is expected.
51
+ expect(html).toContain("E = mc^2");
52
+ });
53
+
54
+ it("renders a block math placeholder in display mode", async () => {
55
+ const json = doc([{ type: "blockMath", attrs: { latex: "\\int_0^1 x\\,dx = \\frac{1}{2}" } }]);
56
+ const html = await renderArticleHTML(json, META, EMPTY_CSS);
57
+
58
+ expect(html).toContain('class="katex"');
59
+ expect(html).toContain("katex-display");
60
+ });
61
+
62
+ it("does not throw on malformed LaTeX (renders an error node instead)", async () => {
63
+ const json = doc([
64
+ { type: "paragraph", content: [{ type: "inlineMath", attrs: { latex: "\\frac{" } }] },
65
+ ]);
66
+ const html = await renderArticleHTML(json, META, EMPTY_CSS);
67
+
68
+ expect(html).toContain("katex");
69
+ });
70
+
71
+ it("drops empty math placeholders so they leave no invisible gap", async () => {
72
+ const json = doc([
73
+ { type: "paragraph", content: [{ type: "inlineMath", attrs: { latex: "" } }] },
74
+ ]);
75
+ const html = await renderArticleHTML(json, META, EMPTY_CSS);
76
+
77
+ expect(html).not.toContain('data-type="inline-math"');
78
+ });
79
+ });