| import type { EmbedDataFileMeta } from "../editor/embeds/embed-data-store"; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| export const ACCEPTED_DATA_EXTS = ["csv", "tsv", "json", "txt", "ndjson"] as const; |
| export type AcceptedDataExt = (typeof ACCEPTED_DATA_EXTS)[number]; |
|
|
| export const MAX_DATA_FILE_SIZE = 3 * 1024 * 1024; |
|
|
| export function extFromName(name: string): string { |
| const match = name.toLowerCase().match(/\.([a-z0-9]+)$/); |
| return match ? match[1] : ""; |
| } |
|
|
| export function isAcceptedExt(ext: string): ext is AcceptedDataExt { |
| return (ACCEPTED_DATA_EXTS as readonly string[]).includes(ext); |
| } |
|
|
| function splitCsvLine(line: string, delim: string): string[] { |
| const out: string[] = []; |
| let cur = ""; |
| let inQuotes = false; |
| for (let i = 0; i < line.length; i++) { |
| const c = line[i]; |
| if (inQuotes) { |
| if (c === '"') { |
| if (line[i + 1] === '"') { |
| cur += '"'; |
| i++; |
| } else { |
| inQuotes = false; |
| } |
| } else { |
| cur += c; |
| } |
| } else if (c === '"') { |
| inQuotes = true; |
| } else if (c === delim) { |
| out.push(cur); |
| cur = ""; |
| } else { |
| cur += c; |
| } |
| } |
| out.push(cur); |
| return out; |
| } |
|
|
| interface ParsedShape { |
| rowCount?: number; |
| columns?: string[]; |
| } |
|
|
| function parseDelimited(content: string, delim: string): ParsedShape { |
| const lines = content |
| .split(/\r\n|\n|\r/) |
| .filter((l) => l.length > 0); |
| if (lines.length === 0) return {}; |
| const header = splitCsvLine(lines[0], delim).map((c) => c.trim()); |
| return { |
| columns: header, |
| rowCount: Math.max(0, lines.length - 1), |
| }; |
| } |
|
|
| function parseJson(content: string): ParsedShape { |
| try { |
| const parsed = JSON.parse(content); |
| if (Array.isArray(parsed)) { |
| const first = parsed.find((r) => r && typeof r === "object"); |
| return { |
| rowCount: parsed.length, |
| columns: first ? Object.keys(first as Record<string, unknown>) : undefined, |
| }; |
| } |
| if (parsed && typeof parsed === "object") { |
| return { columns: Object.keys(parsed as Record<string, unknown>) }; |
| } |
| } catch { |
| |
| } |
| return {}; |
| } |
|
|
| function parseNdjson(content: string): ParsedShape { |
| const lines = content.split(/\r\n|\n|\r/).filter((l) => l.trim().length > 0); |
| if (lines.length === 0) return {}; |
| let columns: string[] | undefined; |
| try { |
| const first = JSON.parse(lines[0]); |
| if (first && typeof first === "object" && !Array.isArray(first)) { |
| columns = Object.keys(first as Record<string, unknown>); |
| } |
| } catch { |
| |
| } |
| return { rowCount: lines.length, columns }; |
| } |
|
|
| export function inferDataShape(ext: string, content: string): ParsedShape { |
| switch (ext) { |
| case "csv": |
| return parseDelimited(content, ","); |
| case "tsv": |
| return parseDelimited(content, "\t"); |
| case "json": |
| return parseJson(content); |
| case "ndjson": |
| return parseNdjson(content); |
| default: |
| return {}; |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| export function formatManifestLine(meta: EmbedDataFileMeta): string { |
| const size = formatBytes(meta.size); |
| const shape = |
| meta.rowCount !== undefined |
| ? ` - ${meta.rowCount} rows` |
| : ""; |
| const cols = meta.columns && meta.columns.length > 0 |
| ? ` - columns: ${meta.columns.slice(0, 12).join(", ")}${meta.columns.length > 12 ? ", ..." : ""}` |
| : ""; |
| return `- ${meta.name} (${meta.ext.toUpperCase()}, ${size}${shape})${cols}`; |
| } |
|
|
| export function formatBytes(bytes: number): string { |
| if (bytes < 1024) return `${bytes} B`; |
| if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; |
| return `${(bytes / (1024 * 1024)).toFixed(2)} MB`; |
| } |
|
|