/** * Shared code-language auto-detection. * * Code blocks authored without an explicit language render as plain text in * both the editor (PM decorations) and the publisher (static SSR), because * Shiki falls back to `"text"` when no language is set. This module fills that * gap: given the block's source, it guesses the language with highlight.js * (via lowlight, already a dependency) and maps the guess onto a language * Shiki actually bundles. * * Used by BOTH `code-block-shiki.tsx` (editor) and `highlight-code.ts` * (publisher) so a language-less block is highlighted identically everywhere, * including in articles that were already written. * * Detection results are cached by source text: the editor rebuilds its * decoration set on every doc change, and we don't want to re-run the * (relatively expensive) auto-detector for blocks whose content is unchanged. */ import { createLowlight, common } from "lowlight"; import { isSupportedLang, normalizeLang } from "./shiki-config.js"; const lowlight = createLowlight(common); /** Don't auto-detect on trivially short snippets - too little signal. */ const MIN_LENGTH = 3; /** * highlight.js relevance is roughly proportional to how many language-specific * constructs matched. A small floor avoids tagging arbitrary prose as code * while still catching short-but-clear snippets (e.g. a couple of imports). */ const MIN_RELEVANCE = 2; const cache = new Map(); const CACHE_MAX = 200; /** * Best-effort detection of the Shiki language for a code block that has no * explicit language. Returns a supported Shiki language name, or "" when * detection is inconclusive or lands on a language Shiki doesn't bundle. */ export function detectShikiLang(code: string | null | undefined): string { const text = code ?? ""; if (text.trim().length < MIN_LENGTH) return ""; const cached = cache.get(text); if (cached !== undefined) return cached; let detected = ""; try { const result = lowlight.highlightAuto(text); const lang = result.data?.language ?? ""; const relevance = result.data?.relevance ?? 0; if (lang && relevance >= MIN_RELEVANCE) { const normalized = normalizeLang(lang); if (isSupportedLang(normalized)) detected = normalized; } } catch { detected = ""; } // Cheap eviction: clear wholesale once the cache grows past the cap. Code // blocks are few and small, so this rarely triggers. if (cache.size >= CACHE_MAX) cache.clear(); cache.set(text, detected); return detected; }