| import { describe, it, expect } from "vitest"; |
| import { |
| renderArticleMarkdown, |
| stripHtmlToText, |
| } from "../src/publisher/markdown-renderer.js"; |
| import type { PublishMeta, CitationData } from "../src/publisher/html-renderer.js"; |
|
|
| const META: PublishMeta = { |
| title: "Test Article", |
| subtitle: "A subtitle", |
| description: "A short description for SEO", |
| authors: [ |
| { name: "Alice", affiliationIndices: [1], affiliationNames: ["MIT"] }, |
| { name: "Bob", affiliationIndices: [2], affiliationNames: ["HF"] }, |
| ], |
| affiliations: [{ name: "MIT" }, { name: "HF" }], |
| date: "2026-04-30", |
| doi: "10.1234/abcd.efgh", |
| }; |
|
|
| const doc = (content: any[]) => ({ type: "doc", content }); |
|
|
| describe("renderArticleMarkdown - header", () => { |
| it("emits an llms.txt-style header with title, description, authors, date and DOI", () => { |
| const md = renderArticleMarkdown(doc([{ type: "paragraph" }]), META); |
| expect(md).toContain("# Test Article"); |
| expect(md).toContain("> A short description for SEO"); |
| expect(md).toContain("- **Authors**: Alice, Bob"); |
| expect(md).toContain("- **Published**: 2026-04-30"); |
| expect(md).toContain("- **DOI**: https://doi.org/10.1234/abcd.efgh"); |
| expect(md).toContain("---"); |
| }); |
|
|
| it("falls back to subtitle when description is empty", () => { |
| const md = renderArticleMarkdown( |
| doc([{ type: "paragraph" }]), |
| { ...META, description: "" }, |
| ); |
| expect(md).toContain("> A subtitle"); |
| }); |
|
|
| it("collapses multi-line titles", () => { |
| const md = renderArticleMarkdown( |
| doc([{ type: "paragraph" }]), |
| { ...META, title: "Line one\\nLine two" }, |
| ); |
| expect(md).toContain("# Line one Line two"); |
| expect(md).not.toContain("\\n"); |
| }); |
| }); |
|
|
| describe("renderArticleMarkdown - block nodes", () => { |
| it("renders headings with the correct markdown level", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { type: "heading", attrs: { level: 2 }, content: [{ type: "text", text: "Hello" }] }, |
| { type: "heading", attrs: { level: 3 }, content: [{ type: "text", text: "Sub" }] }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("## Hello"); |
| expect(md).toContain("### Sub"); |
| }); |
|
|
| it("applies bold/italic/code/link marks", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "paragraph", |
| content: [ |
| { type: "text", text: "bold", marks: [{ type: "bold" }] }, |
| { type: "text", text: " " }, |
| { type: "text", text: "italic", marks: [{ type: "italic" }] }, |
| { type: "text", text: " " }, |
| { type: "text", text: "code", marks: [{ type: "code" }] }, |
| { type: "text", text: " " }, |
| { |
| type: "text", |
| text: "link", |
| marks: [{ type: "link", attrs: { href: "https://example.com" } }], |
| }, |
| ], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("**bold**"); |
| expect(md).toContain("*italic*"); |
| expect(md).toContain("`code`"); |
| expect(md).toContain("[link](https://example.com)"); |
| }); |
|
|
| it("renders bullet and ordered lists", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "bulletList", |
| content: [ |
| { type: "listItem", content: [{ type: "paragraph", content: [{ type: "text", text: "one" }] }] }, |
| { type: "listItem", content: [{ type: "paragraph", content: [{ type: "text", text: "two" }] }] }, |
| ], |
| }, |
| { |
| type: "orderedList", |
| content: [ |
| { type: "listItem", content: [{ type: "paragraph", content: [{ type: "text", text: "first" }] }] }, |
| { type: "listItem", content: [{ type: "paragraph", content: [{ type: "text", text: "second" }] }] }, |
| ], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("- one"); |
| expect(md).toContain("- two"); |
| expect(md).toContain("1. first"); |
| expect(md).toContain("2. second"); |
| }); |
|
|
| it("renders code blocks with language fence", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "codeBlock", |
| attrs: { language: "ts" }, |
| content: [{ type: "text", text: "const x = 1;" }], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("```ts"); |
| expect(md).toContain("const x = 1;"); |
| expect(md).toContain("```"); |
| }); |
|
|
| it("renders inline and block math", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "paragraph", |
| content: [ |
| { type: "text", text: "Energy: " }, |
| { type: "inlineMath", attrs: { latex: "E = mc^2" } }, |
| ], |
| }, |
| { type: "blockMath", attrs: { latex: "\\int_0^1 x dx" } }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("$E = mc^2$"); |
| expect(md).toContain("$$\n\\int_0^1 x dx\n$$"); |
| }); |
|
|
| it("renders tables with a header row separator", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "table", |
| content: [ |
| { |
| type: "tableRow", |
| content: [ |
| { type: "tableHeader", content: [{ type: "paragraph", content: [{ type: "text", text: "Col A" }] }] }, |
| { type: "tableHeader", content: [{ type: "paragraph", content: [{ type: "text", text: "Col B" }] }] }, |
| ], |
| }, |
| { |
| type: "tableRow", |
| content: [ |
| { type: "tableCell", content: [{ type: "paragraph", content: [{ type: "text", text: "1" }] }] }, |
| { type: "tableCell", content: [{ type: "paragraph", content: [{ type: "text", text: "2" }] }] }, |
| ], |
| }, |
| ], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("| Col A | Col B |"); |
| expect(md).toContain("| --- | --- |"); |
| expect(md).toContain("| 1 | 2 |"); |
| }); |
| }); |
|
|
| describe("renderArticleMarkdown - custom components", () => { |
| it("collapses HtmlEmbed to a single inline placeholder with title and src", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "htmlEmbed", |
| attrs: { src: "d3-chart.html", title: "Citations over time", desc: "" }, |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("*[Interactive visualization: Citations over time]*"); |
| expect(md).not.toContain("<iframe"); |
| }); |
|
|
| it("renders Note as a blockquote", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "note", |
| content: [ |
| { type: "paragraph", content: [{ type: "text", text: "Heads up." }] }, |
| ], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("> Heads up."); |
| }); |
|
|
| it("renders Accordion with bold title and inner content", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "accordion", |
| attrs: { title: "More details" }, |
| content: [ |
| { type: "paragraph", content: [{ type: "text", text: "Inside." }] }, |
| ], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("**More details**"); |
| expect(md).toContain("Inside."); |
| }); |
|
|
| it("renders QuoteBlock with attribution", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "quoteBlock", |
| attrs: { author: "Ada Lovelace", source: "Notes" }, |
| content: [ |
| { type: "paragraph", content: [{ type: "text", text: "The future is open." }] }, |
| ], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("> The future is open."); |
| expect(md).toContain("> -- Ada Lovelace, Notes"); |
| }); |
|
|
| it("renders HfUser as a markdown link to huggingface.co/<u>", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "hfUser", |
| attrs: { username: "tfrere", name: "Thibaud Frere" }, |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("[Thibaud Frere](https://huggingface.co/tfrere)"); |
| }); |
|
|
| it("renders Mermaid as a fenced ```mermaid block", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "mermaid", |
| attrs: { code: "graph TD\n A --> B" }, |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("```mermaid"); |
| expect(md).toContain("graph TD"); |
| expect(md).toContain("A --> B"); |
| }); |
|
|
| it("unwraps Wide / FullWidth / Stack containers", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "wide", |
| content: [ |
| { type: "paragraph", content: [{ type: "text", text: "Wide content." }] }, |
| ], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("Wide content."); |
| expect(md).not.toContain("[wide]"); |
| }); |
| }); |
|
|
| describe("renderArticleMarkdown - citations and footnotes", () => { |
| it("renders citations as keys for APA and as numeric tags for IEEE", () => { |
| const json = doc([ |
| { |
| type: "paragraph", |
| content: [ |
| { type: "text", text: "See " }, |
| { type: "citation", attrs: { key: "smith2024", label: "Smith (2024)" } }, |
| { type: "text", text: "." }, |
| ], |
| }, |
| ]); |
| const apa: CitationData = { |
| entries: [{ id: "smith2024" }], |
| orderedKeys: ["smith2024"], |
| style: "apa", |
| }; |
| const ieee: CitationData = { |
| entries: [{ id: "smith2024" }], |
| orderedKeys: ["smith2024"], |
| style: "ieee", |
| }; |
| expect(renderArticleMarkdown(json, META, apa)).toContain("Smith (2024)"); |
| expect(renderArticleMarkdown(json, META, ieee)).toContain("[1]"); |
| }); |
|
|
| it("collects footnotes and emits a footnotes section", () => { |
| const md = renderArticleMarkdown( |
| doc([ |
| { |
| type: "paragraph", |
| content: [ |
| { type: "text", text: "Body" }, |
| { type: "footnote", attrs: { content: "First note" } }, |
| { type: "text", text: " more " }, |
| { type: "footnote", attrs: { content: "Second note" } }, |
| ], |
| }, |
| ]), |
| META, |
| ); |
| expect(md).toContain("[^1]"); |
| expect(md).toContain("[^2]"); |
| expect(md).toContain("## Footnotes"); |
| expect(md).toContain("[^1]: First note"); |
| expect(md).toContain("[^2]: Second note"); |
| }); |
|
|
| it("appends a References section from the formatted bibliography", () => { |
| const biblio = '<div class="csl-entry">Smith, J. (2024). <i>Test Paper</i>. Journal.</div>'; |
| const md = renderArticleMarkdown( |
| doc([{ type: "paragraph", content: [{ type: "text", text: "Body" }] }]), |
| META, |
| undefined, |
| biblio, |
| ); |
| expect(md).toContain("## References"); |
| expect(md).toContain("Smith, J. (2024)."); |
| expect(md).toContain("Test Paper"); |
| expect(md).not.toContain("<div"); |
| }); |
| }); |
|
|
| describe("stripHtmlToText", () => { |
| it("converts <a href> to a markdown link", () => { |
| expect(stripHtmlToText('<a href="https://example.com">click</a>')).toBe( |
| "[click](https://example.com)", |
| ); |
| }); |
|
|
| it("decodes common HTML entities", () => { |
| expect(stripHtmlToText("Tom & Jerry <3")).toBe("Tom & Jerry <3"); |
| }); |
|
|
| it("collapses block tags into newlines and removes the rest", () => { |
| const html = "<p>One.</p><p>Two.</p>"; |
| expect(stripHtmlToText(html).trim()).toBe("One.\nTwo."); |
| }); |
| }); |
|
|