| /** | |
| * Shared code-language auto-detection. | |
| * | |
| * Code blocks authored without an explicit language render as plain text in | |
| * both the editor (PM decorations) and the publisher (static SSR), because | |
| * Shiki falls back to `"text"` when no language is set. This module fills that | |
| * gap: given the block's source, it guesses the language with highlight.js | |
| * (via lowlight, already a dependency) and maps the guess onto a language | |
| * Shiki actually bundles. | |
| * | |
| * Used by BOTH `code-block-shiki.tsx` (editor) and `highlight-code.ts` | |
| * (publisher) so a language-less block is highlighted identically everywhere, | |
| * including in articles that were already written. | |
| * | |
| * Detection results are cached by source text: the editor rebuilds its | |
| * decoration set on every doc change, and we don't want to re-run the | |
| * (relatively expensive) auto-detector for blocks whose content is unchanged. | |
| */ | |
| import { createLowlight, common } from "lowlight"; | |
| import { isSupportedLang, normalizeLang } from "./shiki-config.js"; | |
| const lowlight = createLowlight(common); | |
| /** Don't auto-detect on trivially short snippets - too little signal. */ | |
| const MIN_LENGTH = 3; | |
| /** | |
| * highlight.js relevance is roughly proportional to how many language-specific | |
| * constructs matched. A small floor avoids tagging arbitrary prose as code | |
| * while still catching short-but-clear snippets (e.g. a couple of imports). | |
| */ | |
| const MIN_RELEVANCE = 2; | |
| const cache = new Map<string, string>(); | |
| const CACHE_MAX = 200; | |
| /** | |
| * Best-effort detection of the Shiki language for a code block that has no | |
| * explicit language. Returns a supported Shiki language name, or "" when | |
| * detection is inconclusive or lands on a language Shiki doesn't bundle. | |
| */ | |
| export function detectShikiLang(code: string | null | undefined): string { | |
| const text = code ?? ""; | |
| if (text.trim().length < MIN_LENGTH) return ""; | |
| const cached = cache.get(text); | |
| if (cached !== undefined) return cached; | |
| let detected = ""; | |
| try { | |
| const result = lowlight.highlightAuto(text); | |
| const lang = result.data?.language ?? ""; | |
| const relevance = result.data?.relevance ?? 0; | |
| if (lang && relevance >= MIN_RELEVANCE) { | |
| const normalized = normalizeLang(lang); | |
| if (isSupportedLang(normalized)) detected = normalized; | |
| } | |
| } catch { | |
| detected = ""; | |
| } | |
| // Cheap eviction: clear wholesale once the cache grows past the cap. Code | |
| // blocks are few and small, so this rarely triggers. | |
| if (cache.size >= CACHE_MAX) cache.clear(); | |
| cache.set(text, detected); | |
| return detected; | |
| } | |