quickgrid commited on
Commit
76938dc
Β·
verified Β·
1 Parent(s): ea5426c

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +1491 -271
index.html CHANGED
@@ -1,302 +1,1522 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Universal Tokenizer Visualizer</title>
7
- <script src="https://cdn.tailwindcss.com"></script>
8
- <link href="https://cdn.jsdelivr.net/npm/@fontsource/inter@5.0.16/index.min.css" rel="stylesheet">
 
 
9
  <style>
10
- body { font-family: 'Inter', sans-serif; background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%); color: #e2e8f0; }
11
- .glass { background: rgba(30, 41, 59, 0.6); backdrop-filter: blur(12px); border: 1px solid rgba(255,255,255,0.08); }
12
- .token-chip { transition: all 0.15s cubic-bezier(0.4, 0, 0.2, 1); cursor: default; position: relative; user-select: none; }
13
- .token-chip:hover { transform: translateY(-3px) scale(1.05); z-index: 20; box-shadow: 0 8px 20px -4px rgba(0,0,0,0.5); }
14
- .token-chip::after {
15
- content: attr(data-tooltip);
16
- position: absolute; bottom: 110%; left: 50%; transform: translateX(-50%) translateY(8px);
17
- background: #0f172a; color: #cbd5e1; padding: 8px 12px; border-radius: 8px;
18
- font-size: 0.75rem; line-height: 1.4; white-space: pre-wrap; max-width: 260px;
19
- opacity: 0; pointer-events: none; transition: all 0.2s; border: 1px solid #334155;
20
- box-shadow: 0 4px 12px rgba(0,0,0,0.4); z-index: 30; text-align: left;
21
- }
22
- .token-chip:hover::after { opacity: 1; transform: translateX(-50%) translateY(0); }
23
- .spinner { width: 20px; height: 20px; border: 2px solid rgba(255,255,255,0.2); border-radius: 50%; border-top-color: #60a5fa; animation: spin 0.8s linear infinite; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  @keyframes spin { to { transform: rotate(360deg); } }
25
- ::-webkit-scrollbar { width: 8px; }
26
- ::-webkit-scrollbar-track { background: #1e293b; }
27
- ::-webkit-scrollbar-thumb { background: #475569; border-radius: 4px; }
28
- .fade-in { animation: fadeIn 0.3s ease-out forwards; opacity: 0; }
29
- @keyframes fadeIn { to { opacity: 1; transform: translateY(0); } }
30
- .glow-btn { box-shadow: 0 0 20px rgba(59, 130, 246, 0.3); }
31
- .glow-btn:hover { box-shadow: 0 0 30px rgba(59, 130, 246, 0.5); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  </style>
33
  </head>
34
- <body class="min-h-screen flex flex-col items-center justify-start p-4 md:p-8">
35
-
36
- <header class="w-full max-w-5xl mb-8 text-center md:text-left">
37
- <h1 class="text-4xl font-bold bg-gradient-to-r from-cyan-400 via-blue-500 to-purple-500 bg-clip-text text-transparent tracking-tight">
38
- 🧩 Universal Tokenizer Visualizer
39
- </h1>
40
- <p class="text-slate-400 mt-2 text-sm md:text-base">
41
- Load any HuggingFace tokenizer directly in your browser. 100% client-side, zero GPU cost.
42
- </p>
43
- </header>
44
 
45
- <main class="w-full max-w-5xl grid grid-cols-1 lg:grid-cols-12 gap-6">
46
- <!-- Control Panel -->
47
- <section class="lg:col-span-4 glass rounded-2xl p-5 flex flex-col gap-4 shadow-2xl">
48
- <div>
49
- <label class="block text-sm font-medium text-slate-300 mb-1.5">HuggingFace Model ID</label>
50
- <input type="text" id="modelId" value="gpt2"
51
- class="w-full bg-slate-900/80 border border-slate-600 rounded-lg px-3 py-2.5 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500/50 transition placeholder-slate-500"
52
- placeholder="e.g., gpt2, meta-llama/Llama-3-8B">
53
- <p class="text-xs text-slate-500 mt-1">Must contain <code class="bg-slate-800 px-1 rounded text-slate-300">tokenizer.json</code></p>
 
 
 
 
54
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- <div class="flex-grow">
57
- <label class="block text-sm font-medium text-slate-300 mb-1.5">Input Text</label>
58
- <textarea id="inputText" rows="7"
59
- class="w-full bg-slate-900/80 border border-slate-600 rounded-lg px-3 py-2.5 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500/50 resize-y transition placeholder-slate-500"
60
- placeholder="Type or paste text to visualize..."></textarea>
 
 
 
 
 
 
 
 
 
 
 
 
61
  </div>
 
 
 
 
 
 
 
 
62
 
63
- <div class="flex flex-col gap-2 bg-slate-900/40 p-3 rounded-lg border border-slate-700/50">
64
- <label class="flex items-center gap-2 cursor-pointer select-none">
65
- <input type="checkbox" id="addSpecial" class="w-4 h-4 rounded bg-slate-800 border-slate-600 text-blue-500 focus:ring-blue-500" checked>
66
- <span class="text-sm text-slate-300">Include special tokens (BOS/EOS)</span>
67
- </label>
68
- <label class="flex items-center gap-2 cursor-pointer select-none">
69
- <input type="checkbox" id="showBytes" class="w-4 h-4 rounded bg-slate-800 border-slate-600 text-blue-500 focus:ring-blue-500">
70
- <span class="text-sm text-slate-300">Show byte values for specials</span>
71
- </label>
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  </div>
73
 
74
- <button id="tokenizeBtn" class="w-full bg-blue-600 hover:bg-blue-500 active:scale-[0.98] text-white font-semibold py-2.5 px-4 rounded-lg transition flex items-center justify-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed glow-btn">
75
- <span>✨ Tokenize</span>
76
- </button>
77
-
78
- <div id="status" class="text-sm text-slate-400 h-6 flex items-center gap-2 truncate px-1"></div>
79
- </section>
80
-
81
- <!-- Visualization Panel -->
82
- <section class="lg:col-span-8 glass rounded-2xl p-5 flex flex-col shadow-2xl min-h-[500px]">
83
- <div class="flex flex-wrap justify-between items-center mb-4 pb-3 border-b border-slate-700/50 gap-3">
84
- <h2 class="text-lg font-semibold text-slate-100">Token Breakdown</h2>
85
- <div class="flex flex-wrap gap-3 text-sm">
86
- <div class="flex items-center gap-1.5 bg-slate-800 px-3 py-1.5 rounded-md border border-slate-700">
87
- <span class="w-2 h-2 rounded-full bg-blue-400"></span>
88
- <span class="text-slate-300">Tokens: <b id="statTokens" class="text-white">0</b></span>
89
- </div>
90
- <div class="flex items-center gap-1.5 bg-slate-800 px-3 py-1.5 rounded-md border border-slate-700">
91
- <span class="w-2 h-2 rounded-full bg-purple-400"></span>
92
- <span class="text-slate-300">Chars: <b id="statChars" class="text-white">0</b></span>
93
- </div>
94
- <div class="flex items-center gap-1.5 bg-slate-800 px-3 py-1.5 rounded-md border border-slate-700">
95
- <span class="w-2 h-2 rounded-full bg-pink-400"></span>
96
- <span class="text-slate-300">Ratio: <b id="statRatio" class="text-white">0.00</b></span>
97
- </div>
98
  </div>
99
  </div>
100
-
101
- <div id="outputGrid" class="flex flex-wrap gap-2 overflow-y-auto p-3 content-start flex-grow min-h-[350px] bg-slate-900/30 rounded-xl border border-slate-800/40">
102
- <div class="w-full h-full flex flex-col items-center justify-center text-slate-500 select-none py-12">
103
- <svg class="w-14 h-14 mb-4 opacity-30" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M13 10V3L4 14h7v7l9-11h-7z"></path></svg>
104
- <p class="mb-2">Enter text and click <span class="text-blue-400 font-medium">Tokenize</span> to visualize.</p>
105
- <p class="text-xs opacity-60">Try <code>gpt2</code>, <code>bert-base-uncased</code>, or <code>Qwen/Qwen2.5-7B</code></p>
106
  </div>
107
  </div>
108
- </section>
 
109
  </main>
110
 
111
- <footer class="mt-10 text-center text-slate-600 text-xs flex flex-col md:flex-row items-center gap-2 pb-6">
112
- <span>Powered by <code class="bg-slate-800 px-1.5 py-0.5 rounded text-blue-400">@huggingface/transformers@3</code></span>
113
- <span>β€’</span>
114
- <span>Runs entirely in-browser via WebAssembly</span>
115
- <span>β€’</span>
116
- <a href="https://github.com/huggingface/transformers.js" target="_blank" class="text-blue-400 hover:underline">Docs</a>
117
  </footer>
118
 
119
- <script type="module">
120
- // Import transformers.js from CDN (ESM compatible)
121
- import { AutoTokenizer } from 'https://esm.sh/@huggingface/transformers@3.0.2';
122
-
123
- const COLORS = [
124
- '#3b82f6', '#8b5cf6', '#ec4899', '#f59e0b', '#10b981',
125
- '#06b6d4', '#6366f1', '#f97316', '#84cc16', '#14b8a6', '#f43f5e', '#a855f7'
126
- ];
127
-
128
- const els = {
129
- modelId: document.getElementById('modelId'),
130
- inputText: document.getElementById('inputText'),
131
- btn: document.getElementById('tokenizeBtn'),
132
- output: document.getElementById('outputGrid'),
133
- status: document.getElementById('status'),
134
- statTokens: document.getElementById('statTokens'),
135
- statChars: document.getElementById('statChars'),
136
- statRatio: document.getElementById('statRatio'),
137
- addSpecial: document.getElementById('addSpecial'),
138
- showBytes: document.getElementById('showBytes')
139
- };
140
-
141
- let tokenizerCache = null;
142
-
143
- function getColor(idx) { return COLORS[idx % COLORS.length]; }
144
-
145
- function setStatus(msg, type = 'info') {
146
- els.status.textContent = msg;
147
- els.status.className = `text-sm h-6 flex items-center gap-2 truncate px-1 ${
148
- type === 'error' ? 'text-red-400' :
149
- type === 'success' ? 'text-green-400' :
150
- 'text-slate-400'
151
- }`;
152
- }
153
-
154
- async function loadTokenizer(modelId) {
155
- if (tokenizerCache && tokenizerCache._id === modelId) return tokenizerCache.tokenizer;
156
-
157
- setStatus('πŸ“₯ Fetching tokenizer configuration...', 'info');
158
- try {
159
- // Progress callback for better UX on large tokenizers
160
- const tokenizer = await AutoTokenizer.from_pretrained(modelId, {
161
- progress_callback: (p) => {
162
- if (p.status === 'loading' || p.status === 'downloading') {
163
- const pct = Math.round((p.loaded / p.total) * 100);
164
- setStatus(`πŸ“₯ ${pct}% ${p.file || 'loading'}`, 'info');
165
- }
166
- }
167
- });
168
-
169
- tokenizerCache = { _id: modelId, tokenizer };
170
- return tokenizer;
171
- } catch (err) {
172
- console.error(err);
173
- throw new Error(`Failed to load tokenizer. Check Model ID, network, or repo visibility. (${err.message})`);
174
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  }
 
 
176
 
177
- function getBackend(tokenizer) {
178
- // transformers.js v3 exposes backend_tokenizer
179
- // Fallback to common property names if structure changes
180
- return tokenizer.backend_tokenizer ?? tokenizer.tokenizer ?? tokenizer;
181
- }
182
-
183
- els.btn.addEventListener('click', async () => {
184
- const modelId = els.modelId.value.trim();
185
- const text = els.inputText.value;
186
- const addSpecial = els.addSpecial.checked;
187
- const showBytes = els.showBytes.checked;
188
-
189
- if (!modelId) return setStatus('⚠️ Please enter a Model ID', 'error');
190
- if (!text) return setStatus('⚠️ Please enter text to tokenize', 'error');
191
-
192
- els.btn.disabled = true;
193
- els.btn.innerHTML = '<div class="spinner"></div> <span class="ml-2">Loading...</span>';
194
- els.output.innerHTML = '';
195
- setStatus('πŸ”„ Initializing tokenizer...');
196
-
197
- try {
198
- const tokenizer = await loadTokenizer(modelId);
199
- setStatus('⚑ Running tokenization...', 'info');
200
-
201
- const backend = getBackend(tokenizer);
202
- if (!backend || typeof backend.encode !== 'function') {
203
- throw new Error('Backend tokenizer encoding method not found. This model format might not be supported in-browser.');
204
- }
205
-
206
- // encode returns { tokens, ids, offsets, ... }
207
- const result = backend.encode(text, { add_special_tokens: addSpecial });
208
- renderTokens(result.tokens, result.ids, result.offsets, text, showBytes);
209
- setStatus(`✨ Done β€’ ${result.ids.length} tokens loaded`, 'success');
210
- } catch (err) {
211
- console.error('Tokenization error:', err);
212
- setStatus(`❌ ${err.message}`, 'error');
213
- els.output.innerHTML = `
214
- <div class="w-full text-center py-12">
215
- <div class="text-red-400 font-medium mb-2">Tokenization Failed</div>
216
- <div class="text-slate-500 text-sm max-w-md mx-auto space-y-2">
217
- <p>${err.message}</p>
218
- <div class="bg-slate-800/50 p-3 rounded border border-slate-700 text-left mt-4">
219
- <p class="text-slate-400 mb-1">Troubleshooting:</p>
220
- <ul class="list-disc list-inside space-y-1 text-xs text-slate-500">
221
- <li>Try a base model first: <code>gpt2</code>, <code>bert-base-uncased</code></li>
222
- <li>Ensure <code>tokenizer.json</code> exists in the repo</li>
223
- <li>Custom tokenizers (e.g., DeepSeek-V3) may need specific config</li>
224
- <li>Check browser console for network/CORS errors</li>
225
- </ul>
226
- </div>
227
- </div>
228
- </div>`;
229
- } finally {
230
- els.btn.disabled = false;
231
- els.btn.innerHTML = '<span>✨ Tokenize</span>';
232
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  });
234
 
235
- function renderTokens(tokens, ids, offsets, originalText, showBytes) {
236
- els.output.innerHTML = '';
237
- const count = ids.length;
238
- els.statTokens.textContent = count;
239
- els.statChars.textContent = originalText.length;
240
- els.statRatio.textContent = originalText.length > 0 ? (count / originalText.length).toFixed(2) : '0.00';
241
-
242
- tokens.forEach((tok, i) => {
243
- const [start, end] = offsets[i] || [0, 0];
244
- const chunk = originalText.slice(start, end);
245
- const color = getColor(i);
246
- const id = ids[i];
247
-
248
- const chip = document.createElement('div');
249
- chip.className = 'token-chip rounded-md px-2 py-1.5 text-sm font-mono border select-none fade-in';
250
- chip.style.animationDelay = `${Math.min(i * 0.02, 2)}s`; // Cap animation delay
251
- chip.style.animationDelay = `${i * 0.015}s`;
252
- chip.style.backgroundColor = `${color}15`;
253
- chip.style.borderColor = `${color}80`;
254
- chip.style.color = color;
255
-
256
- // Tooltip details
257
- const cleanChunk = chunk.replace(/"/g, '\\"').replace(/\n/g, '\\n').replace(/\t/g, '\\t');
258
- const byteInfo = showBytes ? `\nBytes: ${[...chunk].map(c => '0x'+c.charCodeAt(0).toString(16).padStart(2,'0')).join(' ')}` : '';
259
- chip.dataset.tooltip = `ID: ${id}\nToken: "${tok}"\nOffset: [${start}, ${end})\nText: "${cleanChunk}"${byteInfo}`;
260
-
261
- // Visual handling of special/invisible tokens
262
- let displayText = tok;
263
- if (tok.startsWith('<') && tok.endsWith('>')) {
264
- chip.classList.add('border-dashed', 'opacity-80');
265
- } else if (chunk === '') {
266
- displayText = '␣';
267
- } else if (chunk === '\n') {
268
- displayText = '↡';
269
- } else if (chunk === '\t') {
270
- displayText = 'β†’';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  }
272
-
273
- chip.innerHTML = `<span class="font-medium">${displayText}</span><span class="opacity-60 text-[10px] ml-1.5 align-top font-sans">${id}</span>`;
274
-
275
- // Interactive highlight
276
- chip.addEventListener('click', () => {
277
- document.querySelectorAll('.token-chip').forEach(c => c.style.boxShadow = 'none');
278
- chip.style.boxShadow = `0 0 0 2px ${color}, 0 4px 12px ${color}40`;
279
- // Optional: auto-scroll to text or highlight in textarea
280
- });
281
-
282
- els.output.appendChild(chip);
283
- });
284
- }
285
-
286
- // Keyboard shortcut: Ctrl/Cmd + Enter
287
- document.addEventListener('keydown', (e) => {
288
- if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
289
- els.btn.click();
290
  }
291
  });
292
 
293
- // Init demo
294
- window.addEventListener('load', () => {
295
- if (!els.inputText.value) {
296
- els.inputText.value = "Tokenization is the bridge between human language and AI. It breaks text into numerical pieces! πŸš€";
297
- }
298
- });
299
- </script>
300
- </body>
301
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>TokenLens β€” LLM Tokenizer Playground</title>
7
+ <meta name="description" content="Visualize how large language models tokenize text. Powered by Transformers.js, runs entirely in your browser." />
8
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
9
+ <link href="https://fonts.googleapis.com/css2?family=Bricolage+Grotesque:opsz,wght@12..96,300;12..96,400;12..96,500;12..96,600;12..96,700;12..96,800&family=JetBrains+Mono:wght@300;400;500;700&family=DM+Sans:wght@300;400;500&display=swap" rel="stylesheet" />
10
+
11
  <style>
12
+ /* ─── Design Tokens ─────────────────────────────────── */
13
+ :root {
14
+ --bg: #060b14;
15
+ --bg2: #0b1220;
16
+ --bg3: #101828;
17
+ --bg4: #162035;
18
+ --border: #1a2d4a;
19
+ --border2: #243d60;
20
+ --glow: #1f3d6e;
21
+ --text: #dce8f8;
22
+ --text2: #7899c0;
23
+ --text3: #3d5a80;
24
+ --accent: #4d9ef5;
25
+ --accent2: #8b6af5;
26
+ --green: #34d89a;
27
+ --amber: #f5a623;
28
+ --red: #f55577;
29
+
30
+ /* Token palette β€” 14 vivid colors for dark bg */
31
+ --t0: #ff8080; --t0b: rgba(255,128,128,.18);
32
+ --t1: #ffb84d; --t1b: rgba(255,184, 77,.18);
33
+ --t2: #ffe066; --t2b: rgba(255,224,102,.18);
34
+ --t3: #7aed91; --t3b: rgba(122,237,145,.18);
35
+ --t4: #4ddfc0; --t4b: rgba( 77,223,192,.18);
36
+ --t5: #56c8f5; --t5b: rgba( 86,200,245,.18);
37
+ --t6: #748ef8; --t6b: rgba(116,142,248,.18);
38
+ --t7: #c484f8; --t7b: rgba(196,132,248,.18);
39
+ --t8: #f57cd4; --t8b: rgba(245,124,212,.18);
40
+ --t9: #fa8072; --t9b: rgba(250,128,114,.18);
41
+ --t10: #8be08b; --t10b: rgba(139,224,139,.18);
42
+ --t11: #f0c040; --t11b: rgba(240,192, 64,.18);
43
+ --t12: #60d4e0; --t12b: rgba( 96,212,224,.18);
44
+ --t13: #e89060; --t13b: rgba(232,144, 96,.18);
45
+ }
46
+
47
+ /* ─── Reset ─────────────────────────────────────────── */
48
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
49
+
50
+ html { scroll-behavior: smooth; }
51
+
52
+ body {
53
+ background: var(--bg);
54
+ color: var(--text);
55
+ font-family: 'DM Sans', sans-serif;
56
+ min-height: 100vh;
57
+ display: flex;
58
+ flex-direction: column;
59
+ overflow-x: hidden;
60
+ }
61
+
62
+ /* ─── Background FX ─────────────────────────────────── */
63
+ #bg-canvas {
64
+ position: fixed;
65
+ inset: 0;
66
+ pointer-events: none;
67
+ z-index: 0;
68
+ }
69
+ .bg-gradient {
70
+ position: fixed;
71
+ inset: 0;
72
+ pointer-events: none;
73
+ z-index: 0;
74
+ background:
75
+ radial-gradient(ellipse 80% 50% at 20% 10%, rgba(77,158,245,.06) 0%, transparent 70%),
76
+ radial-gradient(ellipse 60% 40% at 80% 90%, rgba(139,106,245,.05) 0%, transparent 60%),
77
+ radial-gradient(ellipse 40% 30% at 60% 50%, rgba(52,216,154,.03) 0%, transparent 60%);
78
+ }
79
+ .dot-grid {
80
+ position: fixed;
81
+ inset: 0;
82
+ pointer-events: none;
83
+ z-index: 0;
84
+ background-image: radial-gradient(circle, rgba(77,158,245,.12) 1px, transparent 1px);
85
+ background-size: 36px 36px;
86
+ mask-image: radial-gradient(ellipse 100% 100% at 50% 50%, black 30%, transparent 80%);
87
+ }
88
+
89
+ /* ─── Layout ─────────────────────────────────────────── */
90
+ #app {
91
+ position: relative;
92
+ z-index: 1;
93
+ display: flex;
94
+ flex-direction: column;
95
+ min-height: 100vh;
96
+ }
97
+
98
+ /* ─── Header ─────────────────────────────────────────── */
99
+ header {
100
+ display: flex;
101
+ align-items: center;
102
+ justify-content: space-between;
103
+ padding: 0 32px;
104
+ height: 64px;
105
+ border-bottom: 1px solid var(--border);
106
+ background: rgba(6,11,20,.85);
107
+ backdrop-filter: blur(20px);
108
+ position: sticky;
109
+ top: 0;
110
+ z-index: 100;
111
+ }
112
+
113
+ .logo {
114
+ display: flex;
115
+ align-items: center;
116
+ gap: 10px;
117
+ text-decoration: none;
118
+ color: var(--text);
119
+ }
120
+ .logo-hex {
121
+ width: 34px;
122
+ height: 34px;
123
+ background: linear-gradient(135deg, var(--accent), var(--accent2));
124
+ clip-path: polygon(50% 0%, 93% 25%, 93% 75%, 50% 100%, 7% 75%, 7% 25%);
125
+ display: flex;
126
+ align-items: center;
127
+ justify-content: center;
128
+ font-size: 14px;
129
+ font-family: 'JetBrains Mono', monospace;
130
+ font-weight: 700;
131
+ color: white;
132
+ }
133
+ .logo-name {
134
+ font-family: 'Bricolage Grotesque', sans-serif;
135
+ font-size: 20px;
136
+ font-weight: 700;
137
+ letter-spacing: -0.5px;
138
+ background: linear-gradient(135deg, #dce8f8 40%, var(--accent));
139
+ -webkit-background-clip: text;
140
+ -webkit-text-fill-color: transparent;
141
+ background-clip: text;
142
+ }
143
+ .logo-tag {
144
+ font-size: 10px;
145
+ font-family: 'JetBrains Mono', monospace;
146
+ color: var(--text3);
147
+ background: var(--bg3);
148
+ border: 1px solid var(--border);
149
+ padding: 2px 6px;
150
+ border-radius: 4px;
151
+ letter-spacing: .5px;
152
+ }
153
+
154
+ .header-right {
155
+ display: flex;
156
+ align-items: center;
157
+ gap: 16px;
158
+ }
159
+ .header-badge {
160
+ display: flex;
161
+ align-items: center;
162
+ gap: 6px;
163
+ font-size: 12px;
164
+ color: var(--text2);
165
+ font-family: 'JetBrains Mono', monospace;
166
+ }
167
+ .header-badge .dot {
168
+ width: 7px;
169
+ height: 7px;
170
+ border-radius: 50%;
171
+ background: var(--green);
172
+ box-shadow: 0 0 8px var(--green);
173
+ animation: pulse-dot 2s ease-in-out infinite;
174
+ }
175
+ @keyframes pulse-dot {
176
+ 0%,100% { opacity: 1; }
177
+ 50% { opacity: .4; }
178
+ }
179
+
180
+ /* ─── Model Selector Bar ─────────────────────────────── */
181
+ .model-bar {
182
+ padding: 16px 32px;
183
+ border-bottom: 1px solid var(--border);
184
+ background: rgba(11,18,32,.7);
185
+ backdrop-filter: blur(12px);
186
+ }
187
+ .model-bar-label {
188
+ font-size: 11px;
189
+ font-family: 'JetBrains Mono', monospace;
190
+ color: var(--text3);
191
+ letter-spacing: 1.5px;
192
+ text-transform: uppercase;
193
+ margin-bottom: 10px;
194
+ }
195
+ .model-tabs {
196
+ display: flex;
197
+ flex-wrap: wrap;
198
+ gap: 8px;
199
+ align-items: center;
200
+ }
201
+ .model-tab {
202
+ display: flex;
203
+ flex-direction: column;
204
+ padding: 8px 14px;
205
+ border: 1px solid var(--border);
206
+ border-radius: 10px;
207
+ background: var(--bg2);
208
+ cursor: pointer;
209
+ transition: all 0.2s ease;
210
+ position: relative;
211
+ overflow: hidden;
212
+ min-width: 110px;
213
+ }
214
+ .model-tab::before {
215
+ content: '';
216
+ position: absolute;
217
+ inset: 0;
218
+ background: linear-gradient(135deg, var(--accent), var(--accent2));
219
+ opacity: 0;
220
+ transition: opacity 0.2s;
221
+ }
222
+ .model-tab:hover {
223
+ border-color: var(--border2);
224
+ transform: translateY(-1px);
225
+ }
226
+ .model-tab.active {
227
+ border-color: var(--accent);
228
+ box-shadow: 0 0 0 1px var(--accent), 0 0 20px rgba(77,158,245,.15);
229
+ }
230
+ .model-tab.active::before { opacity: .08; }
231
+ .model-tab-name {
232
+ font-family: 'Bricolage Grotesque', sans-serif;
233
+ font-size: 13px;
234
+ font-weight: 600;
235
+ color: var(--text);
236
+ position: relative;
237
+ }
238
+ .model-tab-org {
239
+ font-size: 10px;
240
+ color: var(--text2);
241
+ font-family: 'JetBrains Mono', monospace;
242
+ position: relative;
243
+ margin-top: 1px;
244
+ }
245
+ .model-tab-vocab {
246
+ font-size: 10px;
247
+ color: var(--text3);
248
+ font-family: 'JetBrains Mono', monospace;
249
+ position: relative;
250
+ }
251
+ .model-org-dot {
252
+ width: 6px;
253
+ height: 6px;
254
+ border-radius: 50%;
255
+ display: inline-block;
256
+ margin-right: 4px;
257
+ position: relative;
258
+ top: -1px;
259
+ }
260
+
261
+ /* Custom model row */
262
+ .custom-model-row {
263
+ display: flex;
264
+ align-items: center;
265
+ gap: 10px;
266
+ margin-top: 12px;
267
+ }
268
+ .custom-model-row label {
269
+ font-size: 11px;
270
+ color: var(--text2);
271
+ font-family: 'JetBrains Mono', monospace;
272
+ white-space: nowrap;
273
+ }
274
+ .custom-input {
275
+ flex: 1;
276
+ max-width: 380px;
277
+ background: var(--bg2);
278
+ border: 1px solid var(--border);
279
+ border-radius: 8px;
280
+ color: var(--text);
281
+ font-family: 'JetBrains Mono', monospace;
282
+ font-size: 13px;
283
+ padding: 7px 12px;
284
+ outline: none;
285
+ transition: border-color 0.2s;
286
+ }
287
+ .custom-input:focus { border-color: var(--accent); }
288
+ .custom-input::placeholder { color: var(--text3); }
289
+ .btn {
290
+ padding: 7px 16px;
291
+ border-radius: 8px;
292
+ border: 1px solid var(--border2);
293
+ background: linear-gradient(135deg, rgba(77,158,245,.15), rgba(139,106,245,.15));
294
+ color: var(--accent);
295
+ font-family: 'DM Sans', sans-serif;
296
+ font-size: 13px;
297
+ font-weight: 500;
298
+ cursor: pointer;
299
+ transition: all 0.2s;
300
+ white-space: nowrap;
301
+ }
302
+ .btn:hover {
303
+ background: linear-gradient(135deg, rgba(77,158,245,.25), rgba(139,106,245,.25));
304
+ border-color: var(--accent);
305
+ }
306
+ .btn:active { transform: scale(.97); }
307
+
308
+ /* ─── Main Split ─────────────────────────────────────── */
309
+ main {
310
+ flex: 1;
311
+ display: grid;
312
+ grid-template-columns: 1fr 1fr;
313
+ gap: 0;
314
+ min-height: 0;
315
+ }
316
+
317
+ /* ─── Left Panel (Input) ─────────────────────────────── */
318
+ .input-panel {
319
+ border-right: 1px solid var(--border);
320
+ display: flex;
321
+ flex-direction: column;
322
+ padding: 0;
323
+ }
324
+ .panel-header {
325
+ padding: 16px 24px 12px;
326
+ border-bottom: 1px solid var(--border);
327
+ display: flex;
328
+ align-items: center;
329
+ justify-content: space-between;
330
+ }
331
+ .panel-title {
332
+ font-family: 'Bricolage Grotesque', sans-serif;
333
+ font-size: 14px;
334
+ font-weight: 600;
335
+ color: var(--text2);
336
+ letter-spacing: .3px;
337
+ display: flex;
338
+ align-items: center;
339
+ gap: 8px;
340
+ }
341
+ .panel-title-icon {
342
+ width: 20px;
343
+ height: 20px;
344
+ background: var(--bg4);
345
+ border: 1px solid var(--border);
346
+ border-radius: 5px;
347
+ display: flex;
348
+ align-items: center;
349
+ justify-content: center;
350
+ font-size: 11px;
351
+ }
352
+
353
+ .sample-btns {
354
+ display: flex;
355
+ gap: 6px;
356
+ }
357
+ .sample-btn {
358
+ font-size: 11px;
359
+ padding: 4px 10px;
360
+ border-radius: 6px;
361
+ border: 1px solid var(--border);
362
+ background: var(--bg2);
363
+ color: var(--text2);
364
+ cursor: pointer;
365
+ font-family: 'DM Sans', sans-serif;
366
+ transition: all .15s;
367
+ }
368
+ .sample-btn:hover {
369
+ border-color: var(--border2);
370
+ color: var(--text);
371
+ }
372
+
373
+ #input-area {
374
+ flex: 1;
375
+ width: 100%;
376
+ background: transparent;
377
+ border: none;
378
+ outline: none;
379
+ resize: none;
380
+ color: var(--text);
381
+ font-family: 'DM Sans', sans-serif;
382
+ font-size: 15px;
383
+ line-height: 1.7;
384
+ padding: 20px 24px;
385
+ min-height: 220px;
386
+ }
387
+ #input-area::placeholder { color: var(--text3); }
388
+
389
+ .char-counter {
390
+ padding: 8px 24px;
391
+ border-top: 1px solid var(--border);
392
+ font-size: 11px;
393
+ font-family: 'JetBrains Mono', monospace;
394
+ color: var(--text3);
395
+ text-align: right;
396
+ }
397
+
398
+ /* ─── Right Panel (Output) ───────────────────────────── */
399
+ .output-panel {
400
+ display: flex;
401
+ flex-direction: column;
402
+ overflow: hidden;
403
+ }
404
+
405
+ /* Stats row */
406
+ .stats-row {
407
+ display: grid;
408
+ grid-template-columns: repeat(4, 1fr);
409
+ border-bottom: 1px solid var(--border);
410
+ }
411
+ .stat-card {
412
+ padding: 16px 20px;
413
+ border-right: 1px solid var(--border);
414
+ position: relative;
415
+ overflow: hidden;
416
+ }
417
+ .stat-card:last-child { border-right: none; }
418
+ .stat-card::after {
419
+ content: '';
420
+ position: absolute;
421
+ bottom: 0;
422
+ left: 0;
423
+ right: 0;
424
+ height: 2px;
425
+ background: linear-gradient(90deg, transparent, var(--accent), transparent);
426
+ opacity: 0;
427
+ transition: opacity .3s;
428
+ }
429
+ .stat-card.highlight::after { opacity: 1; }
430
+ .stat-label {
431
+ font-size: 10px;
432
+ font-family: 'JetBrains Mono', monospace;
433
+ color: var(--text3);
434
+ text-transform: uppercase;
435
+ letter-spacing: 1px;
436
+ margin-bottom: 6px;
437
+ }
438
+ .stat-value {
439
+ font-family: 'Bricolage Grotesque', sans-serif;
440
+ font-size: 26px;
441
+ font-weight: 700;
442
+ color: var(--text);
443
+ line-height: 1;
444
+ transition: all .3s;
445
+ }
446
+ .stat-card:nth-child(1) .stat-value { color: var(--accent); }
447
+ .stat-card:nth-child(2) .stat-value { color: var(--green); }
448
+ .stat-card:nth-child(3) .stat-value { color: var(--amber); }
449
+ .stat-card:nth-child(4) .stat-value { color: var(--accent2); }
450
+ .stat-sub {
451
+ font-size: 10px;
452
+ color: var(--text3);
453
+ font-family: 'JetBrains Mono', monospace;
454
+ margin-top: 3px;
455
+ }
456
+
457
+ /* View toggle */
458
+ .view-toggle {
459
+ display: flex;
460
+ padding: 12px 20px;
461
+ border-bottom: 1px solid var(--border);
462
+ gap: 4px;
463
+ align-items: center;
464
+ justify-content: space-between;
465
+ }
466
+ .toggle-group {
467
+ display: flex;
468
+ gap: 4px;
469
+ background: var(--bg2);
470
+ border: 1px solid var(--border);
471
+ border-radius: 8px;
472
+ padding: 3px;
473
+ }
474
+ .toggle-btn {
475
+ padding: 5px 14px;
476
+ border-radius: 6px;
477
+ border: none;
478
+ background: transparent;
479
+ color: var(--text2);
480
+ font-family: 'DM Sans', sans-serif;
481
+ font-size: 12px;
482
+ font-weight: 500;
483
+ cursor: pointer;
484
+ transition: all .15s;
485
+ }
486
+ .toggle-btn.active {
487
+ background: var(--bg4);
488
+ color: var(--text);
489
+ box-shadow: 0 1px 4px rgba(0,0,0,.3);
490
+ }
491
+ .special-toggle {
492
+ display: flex;
493
+ align-items: center;
494
+ gap: 8px;
495
+ font-size: 12px;
496
+ color: var(--text2);
497
+ }
498
+ .toggle-switch {
499
+ width: 32px;
500
+ height: 18px;
501
+ background: var(--bg4);
502
+ border: 1px solid var(--border);
503
+ border-radius: 9px;
504
+ cursor: pointer;
505
+ position: relative;
506
+ transition: background .2s;
507
+ }
508
+ .toggle-switch::after {
509
+ content: '';
510
+ position: absolute;
511
+ width: 12px;
512
+ height: 12px;
513
+ border-radius: 50%;
514
+ background: var(--text3);
515
+ top: 2px;
516
+ left: 2px;
517
+ transition: all .2s;
518
+ }
519
+ .toggle-switch.on { background: rgba(77,158,245,.3); border-color: var(--accent); }
520
+ .toggle-switch.on::after { left: 16px; background: var(--accent); }
521
+
522
+ /* Token Display */
523
+ .token-display {
524
+ flex: 1;
525
+ overflow-y: auto;
526
+ padding: 20px;
527
+ scrollbar-width: thin;
528
+ scrollbar-color: var(--border) transparent;
529
+ }
530
+
531
+ .placeholder-msg {
532
+ display: flex;
533
+ flex-direction: column;
534
+ align-items: center;
535
+ justify-content: center;
536
+ height: 200px;
537
+ gap: 16px;
538
+ color: var(--text3);
539
+ }
540
+ .placeholder-icon {
541
+ font-size: 40px;
542
+ filter: grayscale(1) opacity(.3);
543
+ }
544
+ .placeholder-msg p {
545
+ font-family: 'JetBrains Mono', monospace;
546
+ font-size: 13px;
547
+ text-align: center;
548
+ }
549
+
550
+ /* ─── Token Visualization Views ───────────────────────── */
551
+
552
+ /* TEXT VIEW β€” inline colored token spans */
553
+ .token-text-view {
554
+ font-family: 'JetBrains Mono', monospace;
555
+ font-size: 14px;
556
+ line-height: 2.2;
557
+ word-break: break-all;
558
+ }
559
+ .tok {
560
+ display: inline;
561
+ border-radius: 4px;
562
+ padding: 1px 0;
563
+ cursor: default;
564
+ transition: filter .15s;
565
+ position: relative;
566
+ }
567
+ .tok:hover { filter: brightness(1.3); }
568
+ .tok-tooltip {
569
+ display: none;
570
+ position: absolute;
571
+ bottom: 110%;
572
+ left: 50%;
573
+ transform: translateX(-50%);
574
+ background: var(--bg4);
575
+ border: 1px solid var(--border2);
576
+ border-radius: 6px;
577
+ padding: 5px 8px;
578
+ font-size: 11px;
579
+ white-space: nowrap;
580
+ z-index: 50;
581
+ pointer-events: none;
582
+ box-shadow: 0 4px 20px rgba(0,0,0,.5);
583
+ }
584
+ .tok:hover .tok-tooltip { display: block; }
585
+ .tok-tooltip-id { color: var(--accent); font-weight: 700; }
586
+ .tok-tooltip-text { color: var(--text2); }
587
+ .tok-space::before { content: 'Β·'; opacity: .3; }
588
+ .tok-newline::before { content: '↡'; opacity: .5; }
589
+
590
+ /* ID VIEW β€” grid of token cards */
591
+ .token-id-view {
592
+ display: flex;
593
+ flex-wrap: wrap;
594
+ gap: 6px;
595
+ }
596
+ .tok-id-card {
597
+ display: flex;
598
+ flex-direction: column;
599
+ align-items: center;
600
+ border-radius: 8px;
601
+ overflow: hidden;
602
+ border: 1px solid;
603
+ cursor: default;
604
+ transition: transform .15s, box-shadow .15s;
605
+ min-width: 52px;
606
+ }
607
+ .tok-id-card:hover {
608
+ transform: translateY(-2px);
609
+ box-shadow: 0 4px 16px rgba(0,0,0,.4);
610
+ }
611
+ .tok-id-top {
612
+ padding: 3px 6px;
613
+ font-family: 'JetBrains Mono', monospace;
614
+ font-size: 11px;
615
+ font-weight: 500;
616
+ width: 100%;
617
+ text-align: center;
618
+ border-bottom: 1px solid rgba(255,255,255,.08);
619
+ }
620
+ .tok-id-bottom {
621
+ padding: 2px 6px 3px;
622
+ font-family: 'JetBrains Mono', monospace;
623
+ font-size: 9px;
624
+ color: rgba(255,255,255,.4);
625
+ width: 100%;
626
+ text-align: center;
627
+ }
628
+
629
+ /* PROBABILITY VIEW placeholder */
630
+ .token-split-view {
631
+ display: flex;
632
+ flex-direction: column;
633
+ gap: 3px;
634
+ }
635
+ .tok-split-row {
636
+ display: flex;
637
+ align-items: stretch;
638
+ border-radius: 6px;
639
+ overflow: hidden;
640
+ border: 1px solid;
641
+ font-family: 'JetBrains Mono', monospace;
642
+ font-size: 12px;
643
+ }
644
+ .tok-split-idx {
645
+ width: 38px;
646
+ text-align: center;
647
+ padding: 5px 4px;
648
+ font-size: 10px;
649
+ color: rgba(255,255,255,.3);
650
+ border-right: 1px solid rgba(255,255,255,.06);
651
+ display: flex;
652
+ align-items: center;
653
+ justify-content: center;
654
+ }
655
+ .tok-split-text {
656
+ flex: 1;
657
+ padding: 5px 8px;
658
+ font-size: 13px;
659
+ }
660
+ .tok-split-id {
661
+ padding: 5px 8px;
662
+ font-size: 11px;
663
+ color: rgba(255,255,255,.45);
664
+ border-left: 1px solid rgba(255,255,255,.06);
665
+ display: flex;
666
+ align-items: center;
667
+ }
668
+
669
+ /* ─── Loading Overlay ────────────────────────────────── */
670
+ #loading-overlay {
671
+ position: fixed;
672
+ inset: 0;
673
+ background: rgba(6,11,20,.92);
674
+ backdrop-filter: blur(8px);
675
+ z-index: 1000;
676
+ display: flex;
677
+ flex-direction: column;
678
+ align-items: center;
679
+ justify-content: center;
680
+ gap: 24px;
681
+ transition: opacity .4s;
682
+ }
683
+ #loading-overlay.hidden { opacity: 0; pointer-events: none; }
684
+
685
+ .loading-spinner {
686
+ width: 56px;
687
+ height: 56px;
688
+ position: relative;
689
+ }
690
+ .loading-spinner::before,
691
+ .loading-spinner::after {
692
+ content: '';
693
+ position: absolute;
694
+ border-radius: 50%;
695
+ border: 2px solid transparent;
696
+ }
697
+ .loading-spinner::before {
698
+ inset: 0;
699
+ border-top-color: var(--accent);
700
+ animation: spin 1s linear infinite;
701
+ }
702
+ .loading-spinner::after {
703
+ inset: 8px;
704
+ border-top-color: var(--accent2);
705
+ animation: spin .7s linear infinite reverse;
706
+ }
707
  @keyframes spin { to { transform: rotate(360deg); } }
708
+
709
+ .loading-text {
710
+ font-family: 'Bricolage Grotesque', sans-serif;
711
+ font-size: 20px;
712
+ font-weight: 600;
713
+ color: var(--text);
714
+ }
715
+ .loading-sub {
716
+ font-family: 'JetBrains Mono', monospace;
717
+ font-size: 12px;
718
+ color: var(--text2);
719
+ max-width: 360px;
720
+ text-align: center;
721
+ }
722
+ .loading-bar-wrap {
723
+ width: 300px;
724
+ height: 3px;
725
+ background: var(--bg3);
726
+ border-radius: 2px;
727
+ overflow: hidden;
728
+ }
729
+ .loading-bar {
730
+ height: 100%;
731
+ width: 0%;
732
+ background: linear-gradient(90deg, var(--accent), var(--accent2));
733
+ border-radius: 2px;
734
+ transition: width .3s;
735
+ }
736
+ .loading-file {
737
+ font-size: 11px;
738
+ font-family: 'JetBrains Mono', monospace;
739
+ color: var(--text3);
740
+ }
741
+
742
+ /* ─── Error Toast ────────────────────────────────────── */
743
+ #toast {
744
+ position: fixed;
745
+ bottom: 24px;
746
+ left: 50%;
747
+ transform: translateX(-50%) translateY(80px);
748
+ background: rgba(245,85,119,.15);
749
+ border: 1px solid rgba(245,85,119,.4);
750
+ color: var(--red);
751
+ padding: 10px 20px;
752
+ border-radius: 10px;
753
+ font-size: 13px;
754
+ font-family: 'JetBrains Mono', monospace;
755
+ z-index: 500;
756
+ transition: transform .3s;
757
+ max-width: 500px;
758
+ text-align: center;
759
+ }
760
+ #toast.show { transform: translateX(-50%) translateY(0); }
761
+
762
+ /* ─── Footer ─────────────────────────────────────────── */
763
+ footer {
764
+ padding: 12px 32px;
765
+ border-top: 1px solid var(--border);
766
+ display: flex;
767
+ align-items: center;
768
+ justify-content: space-between;
769
+ font-size: 11px;
770
+ color: var(--text3);
771
+ font-family: 'JetBrains Mono', monospace;
772
+ background: rgba(6,11,20,.8);
773
+ }
774
+ footer a {
775
+ color: var(--text2);
776
+ text-decoration: none;
777
+ transition: color .15s;
778
+ }
779
+ footer a:hover { color: var(--accent); }
780
+
781
+ /* ─── Scrollbar ──────────────────────────────────────── */
782
+ ::-webkit-scrollbar { width: 6px; }
783
+ ::-webkit-scrollbar-track { background: transparent; }
784
+ ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
785
+ ::-webkit-scrollbar-thumb:hover { background: var(--border2); }
786
+
787
+ /* ─── Model color indicator ──────────────────────────── */
788
+ .model-indicator {
789
+ display: flex;
790
+ align-items: center;
791
+ gap: 6px;
792
+ font-size: 11px;
793
+ font-family: 'JetBrains Mono', monospace;
794
+ color: var(--text2);
795
+ }
796
+ .model-indicator-dot {
797
+ width: 8px;
798
+ height: 8px;
799
+ border-radius: 50%;
800
+ }
801
+
802
+ /* ─── Responsive ─────────────────────────────────────── */
803
+ @media (max-width: 900px) {
804
+ header { padding: 0 16px; }
805
+ .model-bar { padding: 12px 16px; }
806
+ main { grid-template-columns: 1fr; }
807
+ .input-panel { border-right: none; border-bottom: 1px solid var(--border); }
808
+ .stats-row { grid-template-columns: repeat(2, 1fr); }
809
+ .stat-card:nth-child(2) { border-right: none; }
810
+ footer { flex-direction: column; gap: 4px; text-align: center; }
811
+ }
812
+
813
+ /* ─── Animations ─────────────────────────────────────── */
814
+ @keyframes fadeIn {
815
+ from { opacity: 0; transform: translateY(6px); }
816
+ to { opacity: 1; transform: translateY(0); }
817
+ }
818
+ .fade-in {
819
+ animation: fadeIn .25s ease forwards;
820
+ }
821
  </style>
822
  </head>
823
+ <body>
824
+
825
+ <!-- Background -->
826
+ <div class="bg-gradient"></div>
827
+ <div class="dot-grid"></div>
 
 
 
 
 
828
 
829
+ <div id="app">
830
+
831
+ <!-- Header -->
832
+ <header>
833
+ <div class="logo">
834
+ <div class="logo-hex">T</div>
835
+ <span class="logo-name">TokenLens</span>
836
+ <span class="logo-tag">v1.0</span>
837
+ </div>
838
+ <div class="header-right">
839
+ <div class="header-badge">
840
+ <span class="dot"></span>
841
+ <span>runs in-browser Β· no server Β· no GPU</span>
842
  </div>
843
+ </div>
844
+ </header>
845
+
846
+ <!-- Model Selector Bar -->
847
+ <div class="model-bar">
848
+ <div class="model-bar-label">β–Έ select tokenizer</div>
849
+ <div class="model-tabs" id="model-tabs">
850
+ <!-- populated by JS -->
851
+ </div>
852
+ <div class="custom-model-row">
853
+ <label>HF model id:</label>
854
+ <input class="custom-input" id="custom-model-input" type="text"
855
+ placeholder="e.g. deepseek-ai/DeepSeek-V4-Pro or Xenova/gpt2" />
856
+ <button class="btn" id="custom-model-btn">Load β†—</button>
857
+ </div>
858
+ </div>
859
 
860
+ <!-- Main -->
861
+ <main>
862
+
863
+ <!-- Left: Input -->
864
+ <div class="input-panel">
865
+ <div class="panel-header">
866
+ <div class="panel-title">
867
+ <div class="panel-title-icon">✎</div>
868
+ Input Text
869
+ </div>
870
+ <div class="sample-btns">
871
+ <button class="sample-btn" data-sample="poetry">Poetry</button>
872
+ <button class="sample-btn" data-sample="code">Code</button>
873
+ <button class="sample-btn" data-sample="multilingual">Multi-lingual</button>
874
+ <button class="sample-btn" data-sample="numbers">Numbers</button>
875
+ <button class="sample-btn" data-sample="clear">Clear</button>
876
+ </div>
877
  </div>
878
+ <textarea id="input-area"
879
+ placeholder="Type or paste text here to see how the tokenizer splits it into tokens…
880
+ &#10;&#10;Try some special characters, code snippets, emojis 🦊, or multi-lingual text (ζ—₯本θͺž, Ψ§Ω„ΨΉΨ±Ψ¨ΩŠΨ©) to see how different models handle them differently."></textarea>
881
+ <div class="char-counter"><span id="char-count">0</span> characters</div>
882
+ </div>
883
+
884
+ <!-- Right: Output -->
885
+ <div class="output-panel">
886
 
887
+ <!-- Stats -->
888
+ <div class="stats-row">
889
+ <div class="stat-card" id="sc-tokens">
890
+ <div class="stat-label">Tokens</div>
891
+ <div class="stat-value" id="stat-tokens">β€”</div>
892
+ <div class="stat-sub" id="stat-model-name">no model loaded</div>
893
+ </div>
894
+ <div class="stat-card" id="sc-chars">
895
+ <div class="stat-label">Characters</div>
896
+ <div class="stat-value" id="stat-chars">β€”</div>
897
+ <div class="stat-sub">total input</div>
898
+ </div>
899
+ <div class="stat-card" id="sc-words">
900
+ <div class="stat-label">Words</div>
901
+ <div class="stat-value" id="stat-words">β€”</div>
902
+ <div class="stat-sub">approx</div>
903
+ </div>
904
+ <div class="stat-card" id="sc-ratio">
905
+ <div class="stat-label">Chars / Token</div>
906
+ <div class="stat-value" id="stat-ratio">β€”</div>
907
+ <div class="stat-sub">efficiency</div>
908
+ </div>
909
  </div>
910
 
911
+ <!-- View toggle -->
912
+ <div class="view-toggle">
913
+ <div class="toggle-group">
914
+ <button class="toggle-btn active" data-view="text">Text View</button>
915
+ <button class="toggle-btn" data-view="ids">ID Grid</button>
916
+ <button class="toggle-btn" data-view="list">Token List</button>
917
+ </div>
918
+ <div class="model-indicator" id="model-indicator">
919
+ <div class="model-indicator-dot" id="model-dot" style="background:#3d5a80"></div>
920
+ <span id="model-indicator-label">no model</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
921
  </div>
922
  </div>
923
+
924
+ <!-- Token display area -->
925
+ <div class="token-display" id="token-display">
926
+ <div class="placeholder-msg" id="placeholder">
927
+ <div class="placeholder-icon">⬑</div>
928
+ <p>Select a model above and type something<br>to see tokenization in action</p>
929
  </div>
930
  </div>
931
+
932
+ </div><!-- /output-panel -->
933
  </main>
934
 
935
+ <footer>
936
+ <span>TokenLens β€” Powered by <a href="https://github.com/xenova/transformers.js" target="_blank">Transformers.js</a> Β· Runs entirely in your browser</span>
937
+ <span>Hover tokens to see IDs Β· Add models via custom input above</span>
 
 
 
938
  </footer>
939
 
940
+ </div><!-- /app -->
941
+
942
+ <!-- Loading Overlay -->
943
+ <div id="loading-overlay">
944
+ <div class="loading-spinner"></div>
945
+ <div class="loading-text" id="loading-title">Loading Tokenizer</div>
946
+ <div class="loading-sub" id="loading-sub">Downloading tokenizer files from Hugging Face Hub…<br>This may take a moment on first load. Files are cached in your browser.</div>
947
+ <div class="loading-bar-wrap">
948
+ <div class="loading-bar" id="loading-bar"></div>
949
+ </div>
950
+ <div class="loading-file" id="loading-file"></div>
951
+ </div>
952
+
953
+ <!-- Toast -->
954
+ <div id="toast"></div>
955
+
956
+ <!-- ─────────────────────────────────────────────────────────
957
+ TokenLens Script
958
+ ─────────────────────────────────────────────────────────
959
+ Architecture:
960
+ β€’ Uses @xenova/transformers (Transformers.js v2) via CDN
961
+ β€’ Tokenizer files downloaded from HF Hub and cached in IndexedDB
962
+ β€’ Extends easily: add entries to MODELS registry
963
+ β€’ Supports BPE, WordPiece, SentencePiece, Unigram tokenizers
964
+ ─────────────────────────────────────────────────────────── -->
965
+ <script type="module">
966
+
967
+ import { AutoTokenizer, env }
968
+ from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
969
+
970
+ // ── Config ────────────────────────────────────────────────
971
+ env.allowLocalModels = false;
972
+ // Use HF CDN for model files
973
+ env.useBrowserCache = true;
974
+
975
+ // ── Model Registry ─────────────────────────────────────────
976
+ // Add any HuggingFace model ID here β€” tokenizer.json + tokenizer_config.json
977
+ // are the only files downloaded (no weights, no GPU needed).
978
+ const MODELS = [
979
+ {
980
+ id: 'Xenova/gpt2',
981
+ name: 'GPT-2',
982
+ org: 'OpenAI',
983
+ color: '#10a37f',
984
+ vocab: '50k',
985
+ type: 'BPE',
986
+ desc: 'Classic GPT-2 BPE tokenizer'
987
+ },
988
+ {
989
+ id: 'Xenova/gpt-4',
990
+ name: 'GPT-4',
991
+ org: 'OpenAI',
992
+ color: '#10a37f',
993
+ vocab: '100k',
994
+ type: 'tiktoken cl100k',
995
+ desc: 'Used by GPT-3.5 & GPT-4'
996
+ },
997
+ {
998
+ id: 'Xenova/llama-tokenizer',
999
+ name: 'LLaMA 2',
1000
+ org: 'Meta',
1001
+ color: '#0466de',
1002
+ vocab: '32k',
1003
+ type: 'SP-BPE',
1004
+ desc: 'SentencePiece BPE β€” LLaMA / LLaMA-2'
1005
+ },
1006
+ {
1007
+ id: 'Xenova/mistral-tokenizer-v1',
1008
+ name: 'Mistral',
1009
+ org: 'Mistral AI',
1010
+ color: '#ff7722',
1011
+ vocab: '32k',
1012
+ type: 'SP-BPE',
1013
+ desc: 'Mistral 7B v0.1 tokenizer'
1014
+ },
1015
+ {
1016
+ id: 'Xenova/bert-base-uncased',
1017
+ name: 'BERT',
1018
+ org: 'Google',
1019
+ color: '#4285f4',
1020
+ vocab: '30k',
1021
+ type: 'WordPiece',
1022
+ desc: 'BERT-base uncased WordPiece'
1023
+ },
1024
+ {
1025
+ id: 'Xenova/t5-base',
1026
+ name: 'T5',
1027
+ org: 'Google',
1028
+ color: '#34a853',
1029
+ vocab: '32k',
1030
+ type: 'Unigram',
1031
+ desc: 'T5 SentencePiece Unigram'
1032
+ },
1033
+ {
1034
+ id: 'Xenova/claude-tokenizer',
1035
+ name: 'Claude',
1036
+ org: 'Anthropic',
1037
+ color: '#cc785c',
1038
+ vocab: '~100k',
1039
+ type: 'BPE',
1040
+ desc: "Anthropic Claude's tokenizer"
1041
+ },
1042
+ {
1043
+ id: 'Xenova/roberta-base',
1044
+ name: 'RoBERTa',
1045
+ org: 'Meta',
1046
+ color: '#1a73e8',
1047
+ vocab: '50k',
1048
+ type: 'BPE',
1049
+ desc: 'RoBERTa byte-level BPE'
1050
+ },
1051
+ ];
1052
+
1053
+ // ── Token Color Palette ────────────────────────────────────
1054
+ const PALETTE = [
1055
+ { text: '#ff8080', bg: 'rgba(255,128,128,.18)', border: 'rgba(255,128,128,.35)' },
1056
+ { text: '#ffb84d', bg: 'rgba(255,184, 77,.18)', border: 'rgba(255,184, 77,.35)' },
1057
+ { text: '#ffe066', bg: 'rgba(255,224,102,.18)', border: 'rgba(255,224,102,.35)' },
1058
+ { text: '#7aed91', bg: 'rgba(122,237,145,.18)', border: 'rgba(122,237,145,.35)' },
1059
+ { text: '#4ddfc0', bg: 'rgba( 77,223,192,.18)', border: 'rgba( 77,223,192,.35)' },
1060
+ { text: '#56c8f5', bg: 'rgba( 86,200,245,.18)', border: 'rgba( 86,200,245,.35)' },
1061
+ { text: '#748ef8', bg: 'rgba(116,142,248,.18)', border: 'rgba(116,142,248,.35)' },
1062
+ { text: '#c484f8', bg: 'rgba(196,132,248,.18)', border: 'rgba(196,132,248,.35)' },
1063
+ { text: '#f57cd4', bg: 'rgba(245,124,212,.18)', border: 'rgba(245,124,212,.35)' },
1064
+ { text: '#fa8072', bg: 'rgba(250,128,114,.18)', border: 'rgba(250,128,114,.35)' },
1065
+ { text: '#8be08b', bg: 'rgba(139,224,139,.18)', border: 'rgba(139,224,139,.35)' },
1066
+ { text: '#f0c040', bg: 'rgba(240,192, 64,.18)', border: 'rgba(240,192, 64,.35)' },
1067
+ { text: '#60d4e0', bg: 'rgba( 96,212,224,.18)', border: 'rgba( 96,212,224,.35)' },
1068
+ { text: '#e89060', bg: 'rgba(232,144, 96,.18)', border: 'rgba(232,144, 96,.35)' },
1069
+ ];
1070
+
1071
+ // ── Sample texts ───────────────────────────────────────────
1072
+ const SAMPLES = {
1073
+ poetry: `Two roads diverged in a yellow wood,
1074
+ And sorry I could not travel both
1075
+ And be one traveler, long I stood
1076
+ And looked down one as far as I could
1077
+ To where it bent in the undergrowth;
1078
+
1079
+ β€” Robert Frost, "The Road Not Taken"`,
1080
+
1081
+ code: `async function fetchData(url, retries = 3) {
1082
+ for (let i = 0; i < retries; i++) {
1083
+ try {
1084
+ const res = await fetch(url);
1085
+ if (!res.ok) throw new Error(\`HTTP \${res.status}\`);
1086
+ return await res.json();
1087
+ } catch (e) {
1088
+ if (i === retries - 1) throw e;
1089
+ await new Promise(r => setTimeout(r, 1000 * 2 ** i));
1090
  }
1091
+ }
1092
+ }`,
1093
 
1094
+ multilingual: `English: The quick brown fox jumps over the lazy dog.
1095
+ ζ—₯本θͺž: εΎθΌ©γ―ηŒ«γ§γ‚γ‚‹γ€‚εε‰γ―γΎγ γͺい。
1096
+ δΈ­ζ–‡: ζ˜₯ηœ δΈθ§‰ζ™“οΌŒε€„ε€„ι—»ε•ΌιΈŸγ€‚
1097
+ Ψ§Ω„ΨΉΨ±Ψ¨ΩŠΨ©: Ψ§Ω„Ω„ΨΊΨ© Ψ§Ω„ΨΉΨ±Ψ¨ΩŠΨ© Ψ¬Ω…ΩŠΩ„Ψ© ΩˆΩ…ΨΉΨ¨Ψ±Ψ©.
1098
+ Ελληνικά: Ξ— Ξ³Ξ½ΟŽΟƒΞ· Ρίναι δύναμη.
1099
+ Emoji: 🌍 🦊 ⚑ 🎯 🧬 πŸ€– πŸ¦‹`,
1100
+
1101
+ numbers: `Ο€ β‰ˆ 3.14159265358979323846
1102
+ e β‰ˆ 2.71828182845904523536
1103
+ Ο† β‰ˆ 1.61803398874989484820
1104
+ 1,000,000 Γ— $42.99 = $42,990,000.00
1105
+ 2024-01-15T08:30:00.000Z
1106
+ IPv4: 192.168.1.1 | IPv6: ::1`,
1107
+
1108
+ clear: ''
1109
+ };
1110
+
1111
+ // ── State ──────────────────────────────────────────────────
1112
+ let activeTokenizer = null;
1113
+ let activeModel = null;
1114
+ let tokenizerCache = {}; // modelId β†’ tokenizer
1115
+ let currentView = 'text';
1116
+ let showSpecial = false;
1117
+ let debounceTimer = null;
1118
+
1119
+ // ── DOM References ─────────────────────────────────────────
1120
+ const $overlay = document.getElementById('loading-overlay');
1121
+ const $loadTitle = document.getElementById('loading-title');
1122
+ const $loadSub = document.getElementById('loading-sub');
1123
+ const $loadBar = document.getElementById('loading-bar');
1124
+ const $loadFile = document.getElementById('loading-file');
1125
+ const $modelTabs = document.getElementById('model-tabs');
1126
+ const $input = document.getElementById('input-area');
1127
+ const $charCount = document.getElementById('char-count');
1128
+ const $display = document.getElementById('token-display');
1129
+ const $placeholder = document.getElementById('placeholder');
1130
+ const $stTokens = document.getElementById('stat-tokens');
1131
+ const $stChars = document.getElementById('stat-chars');
1132
+ const $stWords = document.getElementById('stat-words');
1133
+ const $stRatio = document.getElementById('stat-ratio');
1134
+ const $stModelName = document.getElementById('stat-model-name');
1135
+ const $modelDot = document.getElementById('model-dot');
1136
+ const $modelLabel = document.getElementById('model-indicator-label');
1137
+ const $toast = document.getElementById('toast');
1138
+ const $customInput = document.getElementById('custom-model-input');
1139
+ const $customBtn = document.getElementById('custom-model-btn');
1140
+
1141
+ // ── Utilities ──────────────────────────────────────────────
1142
+
1143
+ function showOverlay(title, sub) {
1144
+ $loadTitle.textContent = title;
1145
+ $loadSub.textContent = sub;
1146
+ $loadBar.style.width = '0%';
1147
+ $loadFile.textContent = '';
1148
+ $overlay.classList.remove('hidden');
1149
+ }
1150
+
1151
+ function hideOverlay() {
1152
+ $overlay.classList.add('hidden');
1153
+ }
1154
+
1155
+ function showToast(msg, duration = 5000) {
1156
+ $toast.textContent = msg;
1157
+ $toast.classList.add('show');
1158
+ setTimeout(() => $toast.classList.remove('show'), duration);
1159
+ }
1160
+
1161
+ function setStats(tokens, text) {
1162
+ const chars = text.length;
1163
+ const words = text.trim() ? text.trim().split(/\s+/).length : 0;
1164
+ const ratio = tokens > 0 && chars > 0 ? (chars / tokens).toFixed(2) : 'β€”';
1165
+
1166
+ $stTokens.textContent = tokens > 0 ? tokens.toLocaleString() : 'β€”';
1167
+ $stChars.textContent = chars > 0 ? chars.toLocaleString() : 'β€”';
1168
+ $stWords.textContent = words > 0 ? words.toLocaleString() : 'β€”';
1169
+ $stRatio.textContent = ratio;
1170
+
1171
+ // Pulse animation
1172
+ ['sc-tokens','sc-chars','sc-words','sc-ratio'].forEach(id => {
1173
+ const el = document.getElementById(id);
1174
+ el.classList.remove('highlight');
1175
+ void el.offsetWidth;
1176
+ el.classList.add('highlight');
1177
+ });
1178
+ }
1179
+
1180
+ // ── Decode raw token string for display ───���───────────────
1181
+ // Handles BPE Δ  prefix, SentencePiece ▁ prefix, byte tokens, etc.
1182
+ function decodeTokenString(raw) {
1183
+ if (!raw) return '';
1184
+ // BPE space prefix
1185
+ let s = raw.replace(/^Δ /, ' ').replace(/Δ /g, ' ');
1186
+ // SentencePiece space prefix
1187
+ s = s.replace(/^▁/, ' ').replace(/▁/g, ' ');
1188
+ // Newline representation
1189
+ s = s.replace(/Ċ/g, '\n');
1190
+ // Carriage return
1191
+ s = s.replace(/\r/g, '');
1192
+ // Byte tokens like <0xAB>
1193
+ s = s.replace(/<0x([0-9A-Fa-f]{2})>/g, (_, hex) => {
1194
+ const code = parseInt(hex, 16);
1195
+ return code < 128 ? String.fromCharCode(code) : `[0x${hex}]`;
1196
+ });
1197
+ return s;
1198
+ }
1199
+
1200
+ // ── Tokenize ───────────────────────────────────────────────
1201
+ async function tokenize(text) {
1202
+ if (!activeTokenizer || !text.trim()) {
1203
+ $display.innerHTML = '';
1204
+ $display.appendChild($placeholder);
1205
+ $placeholder.style.display = 'flex';
1206
+ setStats(0, text);
1207
+ return;
1208
+ }
1209
+
1210
+ try {
1211
+ $placeholder.style.display = 'none';
1212
+
1213
+ // Run tokenizer β€” only tokenize, no special tokens by default
1214
+ const encoded = await activeTokenizer(text, {
1215
+ add_special_tokens: showSpecial,
1216
+ return_offsets_mapping: false,
1217
  });
1218
 
1219
+ const ids = Array.from(encoded.input_ids.data);
1220
+
1221
+ // Get raw token strings
1222
+ let rawTokens;
1223
+ try {
1224
+ rawTokens = activeTokenizer.model.convert_ids_to_tokens(ids);
1225
+ } catch {
1226
+ // Fallback: decode each token individually
1227
+ rawTokens = await Promise.all(
1228
+ ids.map(id => activeTokenizer.decode([id], { skip_special_tokens: false }))
1229
+ );
1230
+ }
1231
+
1232
+ // Pair: { id, raw, display }
1233
+ const tokens = ids.map((id, i) => ({
1234
+ id,
1235
+ raw: rawTokens[i] || '',
1236
+ display: decodeTokenString(rawTokens[i] || ''),
1237
+ }));
1238
+
1239
+ setStats(tokens.length, text);
1240
+ renderView(tokens);
1241
+
1242
+ } catch (err) {
1243
+ console.error('Tokenization error:', err);
1244
+ showToast('Tokenization error: ' + err.message);
1245
+ }
1246
+ }
1247
+
1248
+ // ── Render Views ───────────────────────────────────────────
1249
+
1250
+ function renderView(tokens) {
1251
+ if (currentView === 'text') renderTextView(tokens);
1252
+ else if (currentView === 'ids') renderIdView(tokens);
1253
+ else if (currentView === 'list') renderListView(tokens);
1254
+ }
1255
+
1256
+ function renderTextView(tokens) {
1257
+ const container = document.createElement('div');
1258
+ container.className = 'token-text-view fade-in';
1259
+
1260
+ tokens.forEach((tok, i) => {
1261
+ const c = PALETTE[i % PALETTE.length];
1262
+ const span = document.createElement('span');
1263
+ span.className = 'tok';
1264
+ span.style.background = c.bg;
1265
+ span.style.color = c.text;
1266
+ span.style.borderBottom = `2px solid ${c.border}`;
1267
+
1268
+ // Display text β€” handle spaces and newlines visually
1269
+ const disp = tok.display;
1270
+ if (disp === ' ') {
1271
+ span.innerHTML = '&nbsp;';
1272
+ } else if (disp === '\n') {
1273
+ span.innerHTML = '↡<br>';
1274
+ } else if (disp === '\t') {
1275
+ span.innerHTML = 'β†’&nbsp;&nbsp;&nbsp;';
1276
+ } else {
1277
+ span.textContent = disp;
1278
+ }
1279
+
1280
+ // Tooltip
1281
+ const tip = document.createElement('div');
1282
+ tip.className = 'tok-tooltip';
1283
+
1284
+ const rawEsc = tok.raw
1285
+ .replace(/&/g,'&amp;')
1286
+ .replace(/</g,'&lt;')
1287
+ .replace(/>/g,'&gt;');
1288
+
1289
+ tip.innerHTML =
1290
+ `<span class="tok-tooltip-id">#${tok.id}</span> Β· ` +
1291
+ `<span class="tok-tooltip-text">${rawEsc || '(empty)'}</span>`;
1292
+ span.appendChild(tip);
1293
+
1294
+ container.appendChild(span);
1295
+ });
1296
+
1297
+ $display.innerHTML = '';
1298
+ $display.appendChild(container);
1299
+ }
1300
+
1301
+ function renderIdView(tokens) {
1302
+ const container = document.createElement('div');
1303
+ container.className = 'token-id-view fade-in';
1304
+
1305
+ tokens.forEach((tok, i) => {
1306
+ const c = PALETTE[i % PALETTE.length];
1307
+ const card = document.createElement('div');
1308
+ card.className = 'tok-id-card';
1309
+ card.style.background = c.bg;
1310
+ card.style.borderColor = c.border;
1311
+ card.title = `Raw: ${tok.raw}`;
1312
+
1313
+ const top = document.createElement('div');
1314
+ top.className = 'tok-id-top';
1315
+ top.style.color = c.text;
1316
+ top.textContent = tok.id;
1317
+
1318
+ const bot = document.createElement('div');
1319
+ bot.className = 'tok-id-bottom';
1320
+ // Show abbreviated display text
1321
+ const label = tok.display.slice(0, 8).replace(/\n/g,'↡').replace(/\t/g,'β†’');
1322
+ bot.textContent = label || '…';
1323
+
1324
+ card.appendChild(top);
1325
+ card.appendChild(bot);
1326
+ container.appendChild(card);
1327
+ });
1328
+
1329
+ $display.innerHTML = '';
1330
+ $display.appendChild(container);
1331
+ }
1332
+
1333
+ function renderListView(tokens) {
1334
+ const container = document.createElement('div');
1335
+ container.className = 'token-split-view fade-in';
1336
+
1337
+ tokens.forEach((tok, i) => {
1338
+ const c = PALETTE[i % PALETTE.length];
1339
+ const row = document.createElement('div');
1340
+ row.className = 'tok-split-row';
1341
+ row.style.background = c.bg;
1342
+ row.style.borderColor = c.border;
1343
+
1344
+ const idx = document.createElement('div');
1345
+ idx.className = 'tok-split-idx';
1346
+ idx.textContent = i;
1347
+
1348
+ const text = document.createElement('div');
1349
+ text.className = 'tok-split-text';
1350
+ text.style.color = c.text;
1351
+ const disp = tok.display.replace(/\n/g,'↡').replace(/\t/g,'β†’') || '(empty)';
1352
+ text.textContent = disp;
1353
+
1354
+ const id = document.createElement('div');
1355
+ id.className = 'tok-split-id';
1356
+ id.textContent = tok.id;
1357
+
1358
+ row.appendChild(idx);
1359
+ row.appendChild(text);
1360
+ row.appendChild(id);
1361
+ container.appendChild(row);
1362
+ });
1363
+
1364
+ $display.innerHTML = '';
1365
+ $display.appendChild(container);
1366
+ }
1367
+
1368
+ // ── Load Tokenizer ─────────────────────────────────────────
1369
+
1370
+ async function loadModel(modelId) {
1371
+ if (tokenizerCache[modelId]) {
1372
+ activeTokenizer = tokenizerCache[modelId];
1373
+ updateModelIndicator(modelId);
1374
+ await runTokenize();
1375
+ return;
1376
+ }
1377
+
1378
+ const displayName = modelId.split('/').pop();
1379
+ showOverlay(
1380
+ `Loading ${displayName}`,
1381
+ `Fetching tokenizer.json and tokenizer_config.json from Hugging Face Hub.\nFiles are cached in IndexedDB after first download.`
1382
+ );
1383
+
1384
+ let lastProgress = 0;
1385
+
1386
+ try {
1387
+ const tokenizer = await AutoTokenizer.from_pretrained(modelId, {
1388
+ progress_callback: (info) => {
1389
+ if (info.status === 'downloading') {
1390
+ const pct = info.total
1391
+ ? Math.round((info.loaded / info.total) * 100)
1392
+ : lastProgress;
1393
+ $loadBar.style.width = pct + '%';
1394
+ $loadFile.textContent = info.file || '';
1395
+ lastProgress = pct;
1396
+ } else if (info.status === 'done') {
1397
+ $loadBar.style.width = '100%';
1398
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1399
  }
1400
  });
1401
 
1402
+ tokenizerCache[modelId] = tokenizer;
1403
+ activeTokenizer = tokenizer;
1404
+ activeModel = modelId;
1405
+
1406
+ updateModelIndicator(modelId);
1407
+ hideOverlay();
1408
+ await runTokenize();
1409
+
1410
+ } catch (err) {
1411
+ hideOverlay();
1412
+ console.error('Failed to load tokenizer:', err);
1413
+ showToast(`Failed to load "${modelId}": ${err.message}. Check the model ID and try again.`, 8000);
1414
+ }
1415
+ }
1416
+
1417
+ function updateModelIndicator(modelId) {
1418
+ const preset = MODELS.find(m => m.id === modelId);
1419
+ const color = preset ? preset.color : '#7899c0';
1420
+ const name = modelId.split('/').pop();
1421
+ $modelDot.style.background = color;
1422
+ $modelDot.style.boxShadow = `0 0 6px ${color}`;
1423
+ $modelLabel.textContent = name;
1424
+ $stModelName.textContent = preset ? `${preset.org} Β· ${preset.type} Β· ${preset.vocab} vocab` : modelId;
1425
+ }
1426
+
1427
+ // ── Build Model Tabs ───────────────────────────────────────
1428
+
1429
+ function buildTabs() {
1430
+ $modelTabs.innerHTML = '';
1431
+ MODELS.forEach(m => {
1432
+ const tab = document.createElement('div');
1433
+ tab.className = 'model-tab';
1434
+ tab.dataset.id = m.id;
1435
+ tab.title = m.desc;
1436
+ tab.innerHTML = `
1437
+ <div class="model-tab-name">${m.name}</div>
1438
+ <div class="model-tab-org">
1439
+ <span class="model-org-dot" style="background:${m.color}"></span>${m.org}
1440
+ </div>
1441
+ <div class="model-tab-vocab">${m.type} Β· ${m.vocab} vocab</div>
1442
+ `;
1443
+ tab.addEventListener('click', () => selectTab(m.id));
1444
+ $modelTabs.appendChild(tab);
1445
+ });
1446
+ }
1447
+
1448
+ function selectTab(modelId) {
1449
+ document.querySelectorAll('.model-tab').forEach(t => {
1450
+ t.classList.toggle('active', t.dataset.id === modelId);
1451
+ });
1452
+ loadModel(modelId);
1453
+ }
1454
+
1455
+ // ── View Toggle ────────────────────────────────────────────
1456
+
1457
+ document.querySelectorAll('.toggle-btn').forEach(btn => {
1458
+ btn.addEventListener('click', () => {
1459
+ document.querySelectorAll('.toggle-btn').forEach(b => b.classList.remove('active'));
1460
+ btn.classList.add('active');
1461
+ currentView = btn.dataset.view;
1462
+ runTokenize();
1463
+ });
1464
+ });
1465
 
1466
+ // ── Input Handling ─────────────────────────────────────────
1467
+
1468
+ async function runTokenize() {
1469
+ const text = $input.value;
1470
+ $charCount.textContent = text.length;
1471
+ await tokenize(text);
1472
+ }
1473
+
1474
+ $input.addEventListener('input', () => {
1475
+ $charCount.textContent = $input.value.length;
1476
+ clearTimeout(debounceTimer);
1477
+ debounceTimer = setTimeout(runTokenize, 280);
1478
+ });
1479
+
1480
+ // ── Sample Buttons ─────────────────────────────────────────
1481
+
1482
+ document.querySelectorAll('.sample-btn').forEach(btn => {
1483
+ btn.addEventListener('click', () => {
1484
+ const key = btn.dataset.sample;
1485
+ $input.value = SAMPLES[key] ?? '';
1486
+ $input.focus();
1487
+ runTokenize();
1488
+ });
1489
+ });
1490
+
1491
+ // ── Custom Model ───────────────────────────────────────────
1492
+
1493
+ async function loadCustomModel() {
1494
+ const id = $customInput.value.trim();
1495
+ if (!id) { showToast('Please enter a model ID'); return; }
1496
+
1497
+ // Deselect tabs
1498
+ document.querySelectorAll('.model-tab').forEach(t => t.classList.remove('active'));
1499
+ activeModel = id;
1500
+ await loadModel(id);
1501
+ }
1502
+
1503
+ $customBtn.addEventListener('click', loadCustomModel);
1504
+ $customInput.addEventListener('keydown', e => {
1505
+ if (e.key === 'Enter') loadCustomModel();
1506
+ });
1507
+
1508
+ // ── Init ───────────────────────────────────────────────────
1509
+
1510
+ buildTabs();
1511
+ // Hide overlay on start (no model yet)
1512
+ $overlay.classList.add('hidden');
1513
+
1514
+ // Default placeholder text
1515
+ $input.value = '';
1516
+
1517
+ // Auto-select first model
1518
+ selectTab(MODELS[0].id);
1519
+
1520
+ </script>
1521
+ </body>
1522
+ </html>