quickgrid commited on
Commit
cc8e1dc
Β·
verified Β·
1 Parent(s): 6e13729

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +1430 -383
index.html CHANGED
@@ -1,464 +1,1511 @@
1
  <!DOCTYPE html>
2
- <html lang="en" class="light">
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>LLM Tokenizer Visualizer</title>
7
- <!-- Tailwind CSS -->
8
- <script src="https://cdn.tailwindcss.com"></script>
9
- <script>
10
- tailwind.config = {
11
- darkMode: 'class',
12
- theme: {
13
- extend: {
14
- fontFamily: {
15
- sans: ['Inter', 'sans-serif'],
16
- mono: ['Fira Code', 'monospace'],
17
- },
18
- }
19
- }
 
 
 
 
 
 
 
 
 
 
20
  }
21
- </script>
22
- <!-- Google Fonts -->
23
- <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet">
24
- <!-- Transformers.js -->
25
- <script type="module">
26
- import { AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.1';
27
- window.AutoTokenizer = AutoTokenizer;
28
- </script>
29
- <style type="text/tailwindcss">
30
- @layer utilities {
31
- .content-auto {
32
- content-visibility: auto;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
34
- .token-box {
35
- white-space: pre;
36
- word-break: break-all;
 
 
37
  }
38
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  </style>
 
40
  </head>
41
- <body class="bg-gray-50 dark:bg-gray-900 text-gray-900 dark:text-gray-100 min-h-screen transition-colors duration-200">
42
- <div class="max-w-5xl mx-auto px-4 py-8">
43
- <!-- Header -->
44
- <header class="flex justify-between items-center mb-8">
45
- <div>
46
- <h1 class="text-3xl font-bold bg-gradient-to-r from-blue-600 to-purple-600 bg-clip-text text-transparent">LLM Tokenizer Visualizer</h1>
47
- <p class="text-gray-600 dark:text-gray-400 mt-1">Client-side tokenization for any Hugging Face model, powered by Transformers.js</p>
 
 
 
 
 
 
 
 
 
 
 
 
48
  </div>
49
- <button id="themeToggle" class="p-2 rounded-full hover:bg-gray-200 dark:hover:bg-gray-800 transition-colors">
50
- <!-- Sun Icon (for dark mode) -->
51
- <svg id="sunIcon" class="w-6 h-6 hidden" fill="none" stroke="currentColor" viewBox="0 0 24 24">
52
- <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 3v1m0 16v1m9-9h-1M4 12H3m15.364 6.364l-.707-.707M6.343 6.343l-.707-.707m12.728 0l-.707.707M6.343 17.657l-.707.707M16 12a4 4 0 11-8 0 4 4 0 018 0z"></path>
53
- </svg>
54
- <!-- Moon Icon (for light mode) -->
55
- <svg id="moonIcon" class="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
56
- <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M20.354 15.354A9 9 0 018.646 3.646 9.003 9.003 0 0012 21a9.003 9.003 0 008.354-5.646z"></path>
57
- </svg>
58
- </button>
59
  </header>
60
 
61
- <!-- Model Config Section -->
62
- <section class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-6 mb-6">
63
- <h2 class="text-lg font-semibold mb-4">Model Configuration</h2>
64
- <div class="grid grid-cols-1 md:grid-cols-12 gap-4">
65
- <div class="md:col-span-8">
66
- <label class="block text-sm font-medium mb-2" for="modelId">Hugging Face Model ID</label>
67
- <div class="flex gap-2">
68
- <input
69
- type="text"
70
- id="modelId"
71
- value="deepseek-ai/DeepSeek-V4-Pro"
72
- class="flex-1 px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none transition-all"
73
- placeholder="e.g. meta-llama/Meta-Llama-3-8B-Instruct"
74
- >
75
- <button
76
- id="loadModelBtn"
77
- class="px-6 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-medium transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed"
78
- >
79
- <svg id="loadSpinner" class="animate-spin w-4 h-4 hidden" fill="none" viewBox="0 0 24 24">
80
- <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
81
- <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
82
- </svg>
83
- <span id="loadBtnText">Load Model</span>
84
- </button>
85
- </div>
86
- </div>
87
- <div class="md:col-span-4">
88
- <label class="block text-sm font-medium mb-2" for="hfToken">Optional HF Token (for gated models)</label>
89
- <input
90
- type="password"
91
- id="hfToken"
92
- class="w-full px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none transition-all"
93
- placeholder="hf_..."
94
- >
95
- </div>
96
  </div>
97
- <div class="mt-4">
98
- <label class="block text-sm font-medium mb-2">Popular Models</label>
99
- <select
100
- id="popularModels"
101
- class="w-full md:w-1/3 px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none transition-all"
102
- >
103
- <option value="">Select a popular model...</option>
104
- <option value="deepseek-ai/DeepSeek-V4-Pro">DeepSeek-V4-Pro</option>
105
- <option value="meta-llama/Meta-Llama-3-8B-Instruct">Llama 3 8B Instruct</option>
106
- <option value="mistralai/Mistral-7B-Instruct-v0.3">Mistral 7B v0.3</option>
107
- <option value="google/gemma-2-2b-it">Gemma 2 2B IT</option>
108
- <option value="Qwen/Qwen2.5-7B-Instruct">Qwen 2.5 7B Instruct</option>
109
- <option value="microsoft/Phi-3-mini-4k-instruct">Phi-3 Mini 4K Instruct</option>
110
- </select>
111
  </div>
112
- <div id="modelError" class="hidden mt-3 p-3 bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300 rounded-lg text-sm"></div>
113
- <div id="modelSuccess" class="hidden mt-3 p-3 bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300 rounded-lg text-sm"></div>
114
- </section>
115
-
116
- <!-- Text Input Section -->
117
- <section class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-6 mb-6">
118
- <div class="flex justify-between items-center mb-4">
119
- <h2 class="text-lg font-semibold">Input Text</h2>
120
- <button
121
- id="clearTextBtn"
122
- class="text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-100 transition-colors"
123
- >
124
- Clear
125
  </button>
126
  </div>
127
- <textarea
128
- id="textInput"
129
- rows="4"
130
- class="w-full px-4 py-3 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none transition-all font-mono text-sm resize-none"
131
- placeholder="Enter text to tokenize..."
132
- disabled
133
- >Hello, world! This is a test of the tokenizer. Let's see how many tokens this takes.
134
- δ½ ε₯½οΌŒδΈ–η•ŒοΌθΏ™ζ˜―δΈ€δΈͺεˆ†θ―ε™¨ζ΅‹θ―•γ€‚
135
- Special tokens: <|endoftext|> <s> </s></textarea>
136
- </section>
137
-
138
- <!-- Stats Section -->
139
- <section class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
140
- <div class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-4">
141
- <p class="text-sm text-gray-600 dark:text-gray-400">Total Tokens</p>
142
- <p id="totalTokens" class="text-2xl font-bold mt-1">0</p>
143
  </div>
144
- <div class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-4">
145
- <p class="text-sm text-gray-600 dark:text-gray-400">Total Characters</p>
146
- <p id="totalChars" class="text-2xl font-bold mt-1">0</p>
 
147
  </div>
148
- <div class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-4">
149
- <p class="text-sm text-gray-600 dark:text-gray-400">Tokens per Character</p>
150
- <p id="tokenRatio" class="text-2xl font-bold mt-1">0.00</p>
 
151
  </div>
152
- </section>
153
-
154
- <!-- Token Visualization Section -->
155
- <section class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-6 mb-6">
156
- <div class="flex justify-between items-center mb-4">
157
- <h2 class="text-lg font-semibold">Token Visualization</h2>
158
- <div class="flex gap-2">
159
- <button
160
- id="copyTokensBtn"
161
- class="px-4 py-2 bg-gray-100 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-600 rounded-lg text-sm font-medium transition-colors flex items-center gap-2 disabled:opacity-50"
162
- disabled
163
- >
164
- <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
165
- <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path>
166
- </svg>
167
- Copy Tokens
168
- </button>
169
- <button
170
- id="copyIdsBtn"
171
- class="px-4 py-2 bg-gray-100 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-600 rounded-lg text-sm font-medium transition-colors flex items-center gap-2 disabled:opacity-50"
172
- disabled
173
- >
174
- <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
175
- <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path>
176
- </svg>
177
- Copy IDs
178
- </button>
179
  </div>
180
  </div>
181
- <div
182
- id="tokenContainer"
183
- class="min-h-[120px] p-4 bg-gray-50 dark:bg-gray-700/50 rounded-lg overflow-x-auto flex flex-wrap gap-2 items-start content-start"
184
- >
185
- <p id="emptyState" class="text-gray-500 dark:text-gray-400 w-full text-center py-8">Load a model and enter text to see tokens</p>
 
 
 
 
 
 
 
 
186
  </div>
187
- <p class="text-xs text-gray-500 dark:text-gray-400 mt-2">Hover over tokens to see their ID. Special tokens are marked with a dashed border.</p>
188
- </section>
189
 
190
- <!-- Footer -->
191
- <footer class="text-center text-gray-600 dark:text-gray-400 text-sm">
192
- <p>Powered by <a href="https://huggingface.co/docs/transformers.js" target="_blank" class="text-blue-600 dark:text-blue-400 hover:underline">Transformers.js</a> | Runs entirely in your browser, no server required</p>
193
- </footer>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  </div>
195
 
196
- <script type="module">
197
- // Wait for Transformers.js to load
198
- await new Promise(resolve => {
199
- const checkInterval = setInterval(() => {
200
- if (window.AutoTokenizer) clearInterval(checkInterval);
201
- }, 100);
202
- });
 
 
 
 
 
 
 
 
 
 
 
203
 
204
- const { AutoTokenizer } = window;
205
-
206
- // DOM Elements
207
- const themeToggle = document.getElementById('themeToggle');
208
- const sunIcon = document.getElementById('sunIcon');
209
- const moonIcon = document.getElementById('moonIcon');
210
- const modelIdInput = document.getElementById('modelId');
211
- const hfTokenInput = document.getElementById('hfToken');
212
- const loadModelBtn = document.getElementById('loadModelBtn');
213
- const loadSpinner = document.getElementById('loadSpinner');
214
- const loadBtnText = document.getElementById('loadBtnText');
215
- const popularModelsSelect = document.getElementById('popularModels');
216
- const modelError = document.getElementById('modelError');
217
- const modelSuccess = document.getElementById('modelSuccess');
218
- const textInput = document.getElementById('textInput');
219
- const clearTextBtn = document.getElementById('clearTextBtn');
220
- const tokenContainer = document.getElementById('tokenContainer');
221
- const emptyState = document.getElementById('emptyState');
222
- const totalTokensEl = document.getElementById('totalTokens');
223
- const totalCharsEl = document.getElementById('totalChars');
224
- const tokenRatioEl = document.getElementById('tokenRatio');
225
- const copyTokensBtn = document.getElementById('copyTokensBtn');
226
- const copyIdsBtn = document.getElementById('copyIdsBtn');
227
-
228
- // State
229
- let tokenizer = null;
230
  let currentTokens = [];
231
- let currentTokenIds = [];
232
- let isDark = localStorage.getItem('theme') === 'dark' || (!localStorage.getItem('theme') && window.matchMedia('(prefers-color-scheme: dark)').matches);
233
-
234
- // Initialize theme
235
- function initTheme() {
236
- if (isDark) {
237
- document.documentElement.classList.add('dark');
238
- sunIcon.classList.remove('hidden');
239
- moonIcon.classList.add('hidden');
240
- } else {
241
- document.documentElement.classList.remove('dark');
242
- sunIcon.classList.add('hidden');
243
- moonIcon.classList.remove('hidden');
244
- }
245
  }
246
- initTheme();
247
 
248
- // Theme toggle
249
- themeToggle.addEventListener('click', () => {
250
- isDark = !isDark;
251
- localStorage.setItem('theme', isDark ? 'dark' : 'light');
252
- initTheme();
253
- // Re-render tokens to update colors
254
- if (currentTokens.length > 0) renderTokens();
255
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- // Popular models select
258
- popularModelsSelect.addEventListener('change', (e) => {
259
- if (e.target.value) {
260
- modelIdInput.value = e.target.value;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  }
262
- });
 
 
 
 
263
 
264
- // Load model function
265
- async function loadModel() {
266
- const modelId = modelIdInput.value.trim();
267
  if (!modelId) {
268
- showError('Please enter a Hugging Face model ID');
 
 
 
 
 
 
 
269
  return;
270
  }
271
 
272
- // Show loading state
273
- loadModelBtn.disabled = true;
274
- loadSpinner.classList.remove('hidden');
275
- loadBtnText.textContent = 'Loading...';
276
- hideMessages();
277
- textInput.disabled = true;
278
 
279
  try {
280
- const token = hfTokenInput.value.trim() || undefined;
281
- tokenizer = await AutoTokenizer.from_pretrained(modelId, { token });
282
-
283
- // Success
284
- showSuccess(`Successfully loaded tokenizer for ${modelId}`);
285
- textInput.disabled = false;
286
- textInput.focus();
287
-
288
- // Auto-tokenize existing text
289
- if (textInput.value.trim()) {
290
- tokenizeText();
291
- }
 
 
 
 
 
 
292
  } catch (err) {
293
- console.error('Failed to load model:', err);
294
- showError(`Failed to load model: ${err.message}. Check the model ID and ensure it's public (or provide a HF token for gated models).`);
295
- tokenizer = null;
296
- textInput.disabled = true;
297
  } finally {
298
- // Reset button state
299
- loadModelBtn.disabled = false;
300
- loadSpinner.classList.add('hidden');
301
- loadBtnText.textContent = 'Load Model';
302
  }
303
  }
304
 
305
- // Tokenize text function
306
- function tokenizeText() {
307
- if (!tokenizer) return;
308
- const text = textInput.value;
309
-
310
- if (!text.trim()) {
311
- currentTokens = [];
312
- currentTokenIds = [];
313
- renderTokens();
314
- updateStats();
315
- copyTokensBtn.disabled = true;
316
- copyIdsBtn.disabled = true;
 
 
 
 
 
 
 
 
 
 
 
317
  return;
318
  }
319
 
 
 
 
 
 
 
320
  try {
321
- const output = tokenizer(text);
322
- currentTokens = output.tokens;
323
- currentTokenIds = output.input_ids;
324
- renderTokens();
325
- updateStats();
326
- copyTokensBtn.disabled = false;
327
- copyIdsBtn.disabled = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  } catch (err) {
329
- console.error('Tokenization failed:', err);
330
- showError(`Tokenization failed: ${err.message}`);
331
  }
332
  }
333
 
334
- // Render tokens
335
- function renderTokens() {
336
- tokenContainer.innerHTML = '';
337
-
338
- if (currentTokens.length === 0) {
339
- tokenContainer.appendChild(emptyState);
340
- emptyState.classList.remove('hidden');
 
 
 
 
 
 
341
  return;
342
  }
343
 
344
- emptyState.classList.add('hidden');
345
-
346
- currentTokens.forEach((token, index) => {
347
- const tokenId = currentTokenIds[index];
348
- // Generate consistent pastel color based on token ID
349
- const hue = (tokenId * 137) % 360; // 137 is prime for even distribution
350
- const lightness = isDark ? '80%' : '90%';
351
- const bgColor = `hsl(${hue}, 70%, ${lightness})`;
352
- const textColor = isDark ? '#1f2937' : '#1f2937';
353
-
354
- const tokenEl = document.createElement('span');
355
- tokenEl.className = 'token-box px-2 py-1 rounded-md text-sm font-mono cursor-pointer transition-all hover:scale-105 hover:shadow-md';
356
- tokenEl.style.backgroundColor = bgColor;
357
- tokenEl.style.color = textColor;
358
-
359
- // Replace whitespace with visible symbols
360
- const displayToken = token
361
- .replace(/\n/g, '↡')
362
- .replace(/\t/g, 'β†’')
363
- .replace(/ /g, 'Β·');
364
- tokenEl.textContent = displayToken;
365
-
366
- // Show token ID on hover
367
- tokenEl.title = `Token ID: ${tokenId}\nRaw: ${token.replace(/\n/g, '\\n').replace(/\t/g, '\\t')}`;
368
-
369
- // Highlight special tokens
370
- if (tokenizer.special_tokens_map && Object.values(tokenizer.special_tokens_map).flat().includes(token)) {
371
- tokenEl.classList.add('border', 'border-dashed', 'border-gray-400', 'dark:border-gray-500');
372
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
- tokenContainer.appendChild(tokenEl);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  });
 
 
 
376
  }
377
 
378
- // Update stats
379
- function updateStats() {
380
- const totalTokens = currentTokens.length;
381
- const totalChars = textInput.value.length;
382
- const ratio = totalChars > 0 ? (totalTokens / totalChars).toFixed(2) : '0.00';
383
 
384
- totalTokensEl.textContent = totalTokens.toLocaleString();
385
- totalCharsEl.textContent = totalChars.toLocaleString();
386
- tokenRatioEl.textContent = ratio;
 
 
 
 
 
 
 
 
 
 
 
387
  }
388
 
389
- // Copy functions
390
- async function copyToClipboard(text, btn) {
391
- try {
392
- await navigator.clipboard.writeText(text);
393
- const originalText = btn.innerHTML;
394
- btn.innerHTML = `
395
- <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
396
- <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7"></path>
397
- </svg>
398
- Copied!
399
- `;
400
- setTimeout(() => {
401
- btn.innerHTML = originalText;
402
- }, 2000);
403
- } catch (err) {
404
- showError('Failed to copy to clipboard');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  }
 
 
 
406
  }
407
 
408
- copyTokensBtn.addEventListener('click', () => {
409
- copyToClipboard(currentTokens.join('\n'), copyTokensBtn);
410
- });
 
 
 
 
 
 
411
 
412
- copyIdsBtn.addEventListener('click', () => {
413
- copyToClipboard(JSON.stringify(currentTokenIds, null, 2), copyIdsBtn);
414
- });
 
 
 
 
 
415
 
416
- // Clear text
417
- clearTextBtn.addEventListener('click', () => {
418
- textInput.value = '';
419
- tokenizeText();
420
- });
421
 
422
- // Helper functions
423
- function showError(message) {
424
- modelError.textContent = message;
425
- modelError.classList.remove('hidden');
426
- modelSuccess.classList.add('hidden');
427
  }
428
 
429
- function showSuccess(message) {
430
- modelSuccess.textContent = message;
431
- modelSuccess.classList.remove('hidden');
432
- modelError.classList.add('hidden');
433
  }
434
 
435
- function hideMessages() {
436
- modelError.classList.add('hidden');
437
- modelSuccess.classList.add('hidden');
 
 
 
 
 
 
 
 
 
 
 
438
  }
439
 
440
- // Debounce for text input
441
- function debounce(func, wait) {
442
- let timeout;
443
- return function(...args) {
444
- clearTimeout(timeout);
445
- timeout = setTimeout(() => func.apply(this, args), wait);
446
- };
447
  }
448
 
449
- const debouncedTokenize = debounce(tokenizeText, 300);
 
 
 
 
450
 
451
- // Event listeners
452
- loadModelBtn.addEventListener('click', loadModel);
453
- modelIdInput.addEventListener('keypress', (e) => {
454
- if (e.key === 'Enter') loadModel();
455
- });
456
- textInput.addEventListener('input', debouncedTokenize);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
 
458
- // Auto-load default model on page load
459
- window.addEventListener('load', () => {
460
- // Wait a bit for Transformers.js to fully initialize
461
- setTimeout(loadModel, 500);
462
  });
463
  </script>
464
  </body>
 
1
  <!DOCTYPE html>
2
+ <html lang="en">
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>TokenViz - Universal Tokenizer Visualizer</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.0"></script>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap" rel="stylesheet">
11
+ <style>
12
+ :root {
13
+ --bg-primary: #0a0a0f;
14
+ --bg-secondary: #12121a;
15
+ --bg-tertiary: #1a1a25;
16
+ --bg-card: #16161f;
17
+ --bg-hover: #1e1e2e;
18
+ --border-color: #2a2a3a;
19
+ --border-light: #3a3a4a;
20
+ --text-primary: #e8e8f0;
21
+ --text-secondary: #a0a0b8;
22
+ --text-muted: #6b6b80;
23
+ --accent: #6366f1;
24
+ --accent-hover: #818cf8;
25
+ --accent-glow: rgba(99, 102, 241, 0.15);
26
+ --success: #22c55e;
27
+ --warning: #f59e0b;
28
+ --error: #ef4444;
29
+ --token-colors: #ef4444, #f97316, #f59e0b, #84cc16, #22c55e, #14b8a6, #06b6d4, #3b82f6, #6366f1, #8b5cf6, #a855f7, #d946ef, #ec4899, #f43f5e;
30
  }
31
+
32
+ * {
33
+ margin: 0;
34
+ padding: 0;
35
+ box-sizing: border-box;
36
+ }
37
+
38
+ body {
39
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
40
+ background: var(--bg-primary);
41
+ color: var(--text-primary);
42
+ min-height: 100vh;
43
+ overflow-x: hidden;
44
+ }
45
+
46
+ /* Animated background mesh */
47
+ .bg-mesh {
48
+ position: fixed;
49
+ top: 0;
50
+ left: 0;
51
+ width: 100%;
52
+ height: 100%;
53
+ z-index: 0;
54
+ pointer-events: none;
55
+ opacity: 0.4;
56
+ }
57
+
58
+ .bg-mesh::before {
59
+ content: '';
60
+ position: absolute;
61
+ top: -50%;
62
+ left: -50%;
63
+ width: 200%;
64
+ height: 200%;
65
+ background:
66
+ radial-gradient(circle at 20% 80%, rgba(99, 102, 241, 0.08) 0%, transparent 50%),
67
+ radial-gradient(circle at 80% 20%, rgba(139, 92, 246, 0.06) 0%, transparent 50%),
68
+ radial-gradient(circle at 50% 50%, rgba(236, 72, 153, 0.04) 0%, transparent 50%);
69
+ animation: meshFloat 20s ease-in-out infinite;
70
+ }
71
+
72
+ @keyframes meshFloat {
73
+ 0%, 100% { transform: translate(0, 0) rotate(0deg); }
74
+ 33% { transform: translate(30px, -30px) rotate(1deg); }
75
+ 66% { transform: translate(-20px, 20px) rotate(-1deg); }
76
+ }
77
+
78
+ .container {
79
+ position: relative;
80
+ z-index: 1;
81
+ max-width: 1400px;
82
+ margin: 0 auto;
83
+ padding: 2rem;
84
+ }
85
+
86
+ /* Header */
87
+ header {
88
+ text-align: center;
89
+ margin-bottom: 3rem;
90
+ padding-top: 1rem;
91
+ }
92
+
93
+ .logo {
94
+ display: inline-flex;
95
+ align-items: center;
96
+ gap: 0.75rem;
97
+ margin-bottom: 1rem;
98
+ }
99
+
100
+ .logo-icon {
101
+ width: 40px;
102
+ height: 40px;
103
+ background: linear-gradient(135deg, var(--accent), #8b5cf6);
104
+ border-radius: 12px;
105
+ display: flex;
106
+ align-items: center;
107
+ justify-content: center;
108
+ font-size: 1.25rem;
109
+ box-shadow: 0 4px 20px rgba(99, 102, 241, 0.3);
110
+ }
111
+
112
+ .logo h1 {
113
+ font-size: 1.75rem;
114
+ font-weight: 700;
115
+ background: linear-gradient(135deg, var(--text-primary), var(--accent));
116
+ -webkit-background-clip: text;
117
+ -webkit-text-fill-color: transparent;
118
+ background-clip: text;
119
+ }
120
+
121
+ .subtitle {
122
+ color: var(--text-secondary);
123
+ font-size: 0.95rem;
124
+ max-width: 600px;
125
+ margin: 0 auto;
126
+ line-height: 1.6;
127
+ }
128
+
129
+ /* Model Selector */
130
+ .model-section {
131
+ background: var(--bg-card);
132
+ border: 1px solid var(--border-color);
133
+ border-radius: 16px;
134
+ padding: 1.5rem;
135
+ margin-bottom: 1.5rem;
136
+ backdrop-filter: blur(10px);
137
+ }
138
+
139
+ .section-title {
140
+ font-size: 0.875rem;
141
+ font-weight: 600;
142
+ color: var(--text-secondary);
143
+ text-transform: uppercase;
144
+ letter-spacing: 0.05em;
145
+ margin-bottom: 1rem;
146
+ display: flex;
147
+ align-items: center;
148
+ gap: 0.5rem;
149
+ }
150
+
151
+ .model-grid {
152
+ display: grid;
153
+ grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
154
+ gap: 0.75rem;
155
+ }
156
+
157
+ .model-card {
158
+ background: var(--bg-secondary);
159
+ border: 1px solid var(--border-color);
160
+ border-radius: 12px;
161
+ padding: 1rem;
162
+ cursor: pointer;
163
+ transition: all 0.3s ease;
164
+ position: relative;
165
+ overflow: hidden;
166
+ }
167
+
168
+ .model-card::before {
169
+ content: '';
170
+ position: absolute;
171
+ top: 0;
172
+ left: 0;
173
+ width: 100%;
174
+ height: 2px;
175
+ background: linear-gradient(90deg, var(--accent), #8b5cf6);
176
+ transform: scaleX(0);
177
+ transform-origin: left;
178
+ transition: transform 0.3s ease;
179
+ }
180
+
181
+ .model-card:hover {
182
+ border-color: var(--border-light);
183
+ transform: translateY(-2px);
184
+ box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
185
+ }
186
+
187
+ .model-card:hover::before {
188
+ transform: scaleX(1);
189
+ }
190
+
191
+ .model-card.active {
192
+ border-color: var(--accent);
193
+ background: var(--accent-glow);
194
+ box-shadow: 0 0 30px var(--accent-glow);
195
+ }
196
+
197
+ .model-card.active::before {
198
+ transform: scaleX(1);
199
+ }
200
+
201
+ .model-name {
202
+ font-weight: 600;
203
+ font-size: 0.9rem;
204
+ color: var(--text-primary);
205
+ margin-bottom: 0.25rem;
206
+ }
207
+
208
+ .model-org {
209
+ font-size: 0.75rem;
210
+ color: var(--text-muted);
211
+ }
212
+
213
+ .model-badge {
214
+ display: inline-block;
215
+ font-size: 0.65rem;
216
+ padding: 0.15rem 0.4rem;
217
+ border-radius: 4px;
218
+ background: rgba(99, 102, 241, 0.15);
219
+ color: var(--accent);
220
+ margin-top: 0.5rem;
221
+ font-weight: 500;
222
+ }
223
+
224
+ /* Custom Model Input */
225
+ .custom-model {
226
+ margin-top: 1rem;
227
+ display: flex;
228
+ gap: 0.75rem;
229
+ align-items: stretch;
230
+ }
231
+
232
+ .custom-model input {
233
+ flex: 1;
234
+ background: var(--bg-primary);
235
+ border: 1px solid var(--border-color);
236
+ border-radius: 10px;
237
+ padding: 0.75rem 1rem;
238
+ color: var(--text-primary);
239
+ font-family: 'JetBrains Mono', monospace;
240
+ font-size: 0.85rem;
241
+ outline: none;
242
+ transition: all 0.3s ease;
243
+ }
244
+
245
+ .custom-model input:focus {
246
+ border-color: var(--accent);
247
+ box-shadow: 0 0 0 3px var(--accent-glow);
248
+ }
249
+
250
+ .custom-model input::placeholder {
251
+ color: var(--text-muted);
252
+ }
253
+
254
+ .btn {
255
+ background: linear-gradient(135deg, var(--accent), #8b5cf6);
256
+ color: white;
257
+ border: none;
258
+ border-radius: 10px;
259
+ padding: 0.75rem 1.5rem;
260
+ font-weight: 600;
261
+ font-size: 0.875rem;
262
+ cursor: pointer;
263
+ transition: all 0.3s ease;
264
+ display: inline-flex;
265
+ align-items: center;
266
+ gap: 0.5rem;
267
+ white-space: nowrap;
268
+ }
269
+
270
+ .btn:hover {
271
+ transform: translateY(-1px);
272
+ box-shadow: 0 4px 20px rgba(99, 102, 241, 0.4);
273
+ }
274
+
275
+ .btn:active {
276
+ transform: translateY(0);
277
+ }
278
+
279
+ .btn:disabled {
280
+ opacity: 0.5;
281
+ cursor: not-allowed;
282
+ transform: none;
283
+ }
284
+
285
+ .btn-secondary {
286
+ background: var(--bg-tertiary);
287
+ border: 1px solid var(--border-color);
288
+ color: var(--text-secondary);
289
+ }
290
+
291
+ .btn-secondary:hover {
292
+ background: var(--bg-hover);
293
+ border-color: var(--border-light);
294
+ box-shadow: none;
295
+ }
296
+
297
+ /* Input Section */
298
+ .input-section {
299
+ background: var(--bg-card);
300
+ border: 1px solid var(--border-color);
301
+ border-radius: 16px;
302
+ padding: 1.5rem;
303
+ margin-bottom: 1.5rem;
304
+ }
305
+
306
+ .input-header {
307
+ display: flex;
308
+ justify-content: space-between;
309
+ align-items: center;
310
+ margin-bottom: 1rem;
311
+ }
312
+
313
+ .input-stats {
314
+ display: flex;
315
+ gap: 1.5rem;
316
+ font-size: 0.8rem;
317
+ color: var(--text-muted);
318
+ }
319
+
320
+ .stat-item {
321
+ display: flex;
322
+ align-items: center;
323
+ gap: 0.35rem;
324
+ }
325
+
326
+ .stat-value {
327
+ color: var(--accent);
328
+ font-weight: 600;
329
+ }
330
+
331
+ textarea {
332
+ width: 100%;
333
+ background: var(--bg-primary);
334
+ border: 1px solid var(--border-color);
335
+ border-radius: 12px;
336
+ padding: 1rem;
337
+ color: var(--text-primary);
338
+ font-family: 'Inter', sans-serif;
339
+ font-size: 1rem;
340
+ line-height: 1.6;
341
+ resize: vertical;
342
+ min-height: 120px;
343
+ outline: none;
344
+ transition: all 0.3s ease;
345
+ }
346
+
347
+ textarea:focus {
348
+ border-color: var(--accent);
349
+ box-shadow: 0 0 0 3px var(--accent-glow);
350
+ }
351
+
352
+ textarea::placeholder {
353
+ color: var(--text-muted);
354
+ }
355
+
356
+ .input-actions {
357
+ display: flex;
358
+ gap: 0.75rem;
359
+ margin-top: 1rem;
360
+ flex-wrap: wrap;
361
+ }
362
+
363
+ /* Visualization Section */
364
+ .viz-section {
365
+ background: var(--bg-card);
366
+ border: 1px solid var(--border-color);
367
+ border-radius: 16px;
368
+ padding: 1.5rem;
369
+ margin-bottom: 1.5rem;
370
+ min-height: 200px;
371
+ }
372
+
373
+ .viz-tabs {
374
+ display: flex;
375
+ gap: 0.5rem;
376
+ margin-bottom: 1.5rem;
377
+ border-bottom: 1px solid var(--border-color);
378
+ padding-bottom: 0.75rem;
379
+ }
380
+
381
+ .viz-tab {
382
+ background: none;
383
+ border: none;
384
+ color: var(--text-muted);
385
+ font-size: 0.875rem;
386
+ font-weight: 500;
387
+ padding: 0.5rem 1rem;
388
+ cursor: pointer;
389
+ border-radius: 8px;
390
+ transition: all 0.3s ease;
391
+ position: relative;
392
+ }
393
+
394
+ .viz-tab:hover {
395
+ color: var(--text-secondary);
396
+ background: var(--bg-hover);
397
+ }
398
+
399
+ .viz-tab.active {
400
+ color: var(--accent);
401
+ background: var(--accent-glow);
402
+ }
403
+
404
+ /* Token Display */
405
+ .tokens-container {
406
+ display: flex;
407
+ flex-wrap: wrap;
408
+ gap: 0.5rem;
409
+ align-items: flex-start;
410
+ font-family: 'JetBrains Mono', monospace;
411
+ line-height: 2;
412
+ }
413
+
414
+ .token {
415
+ display: inline-flex;
416
+ flex-direction: column;
417
+ align-items: center;
418
+ position: relative;
419
+ cursor: pointer;
420
+ transition: transform 0.2s ease;
421
+ }
422
+
423
+ .token:hover {
424
+ transform: translateY(-2px);
425
+ z-index: 10;
426
+ }
427
+
428
+ .token-box {
429
+ padding: 0.35rem 0.6rem;
430
+ border-radius: 8px;
431
+ font-size: 0.85rem;
432
+ font-weight: 500;
433
+ border: 1px solid transparent;
434
+ transition: all 0.2s ease;
435
+ position: relative;
436
+ min-width: 2rem;
437
+ text-align: center;
438
+ }
439
+
440
+ .token:hover .token-box {
441
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
442
+ }
443
+
444
+ .token-id {
445
+ font-size: 0.65rem;
446
+ color: var(--text-muted);
447
+ margin-top: 0.2rem;
448
+ font-family: 'JetBrains Mono', monospace;
449
+ }
450
+
451
+ .token-tooltip {
452
+ position: absolute;
453
+ bottom: calc(100% + 8px);
454
+ left: 50%;
455
+ transform: translateX(-50%) scale(0.9);
456
+ background: var(--bg-tertiary);
457
+ border: 1px solid var(--border-light);
458
+ border-radius: 10px;
459
+ padding: 0.75rem;
460
+ font-size: 0.8rem;
461
+ white-space: nowrap;
462
+ opacity: 0;
463
+ pointer-events: none;
464
+ transition: all 0.2s ease;
465
+ z-index: 100;
466
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
467
+ }
468
+
469
+ .token:hover .token-tooltip {
470
+ opacity: 1;
471
+ transform: translateX(-50%) scale(1);
472
+ }
473
+
474
+ .token-tooltip::after {
475
+ content: '';
476
+ position: absolute;
477
+ top: 100%;
478
+ left: 50%;
479
+ transform: translateX(-50%);
480
+ border: 6px solid transparent;
481
+ border-top-color: var(--border-light);
482
+ }
483
+
484
+ .tooltip-row {
485
+ display: flex;
486
+ gap: 0.5rem;
487
+ align-items: center;
488
+ }
489
+
490
+ .tooltip-label {
491
+ color: var(--text-muted);
492
+ font-size: 0.75rem;
493
+ }
494
+
495
+ .tooltip-value {
496
+ color: var(--text-primary);
497
+ font-weight: 500;
498
+ }
499
+
500
+ /* Byte-level view */
501
+ .byte-view {
502
+ font-family: 'JetBrains Mono', monospace;
503
+ font-size: 0.85rem;
504
+ line-height: 1.8;
505
+ }
506
+
507
+ .byte-row {
508
+ display: flex;
509
+ gap: 0.25rem;
510
+ margin-bottom: 0.25rem;
511
+ align-items: center;
512
+ }
513
+
514
+ .byte-char {
515
+ width: 2rem;
516
+ text-align: center;
517
+ color: var(--text-secondary);
518
+ }
519
+
520
+ .byte-hex {
521
+ width: 2.5rem;
522
+ text-align: center;
523
+ color: var(--accent);
524
+ font-size: 0.8rem;
525
+ }
526
+
527
+ .byte-token {
528
+ padding: 0.15rem 0.4rem;
529
+ border-radius: 4px;
530
+ font-size: 0.8rem;
531
+ margin-left: 0.5rem;
532
+ }
533
+
534
+ /* ID List View */
535
+ .id-list {
536
+ display: flex;
537
+ flex-wrap: wrap;
538
+ gap: 0.5rem;
539
+ font-family: 'JetBrains Mono', monospace;
540
+ }
541
+
542
+ .id-chip {
543
+ background: var(--bg-secondary);
544
+ border: 1px solid var(--border-color);
545
+ border-radius: 8px;
546
+ padding: 0.4rem 0.75rem;
547
+ font-size: 0.85rem;
548
+ color: var(--text-secondary);
549
+ transition: all 0.2s ease;
550
+ }
551
+
552
+ .id-chip:hover {
553
+ border-color: var(--accent);
554
+ color: var(--accent);
555
+ }
556
+
557
+ /* Comparison View */
558
+ .comparison-grid {
559
+ display: grid;
560
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
561
+ gap: 1rem;
562
+ }
563
+
564
+ .comparison-card {
565
+ background: var(--bg-secondary);
566
+ border: 1px solid var(--border-color);
567
+ border-radius: 12px;
568
+ padding: 1rem;
569
+ }
570
+
571
+ .comparison-header {
572
+ display: flex;
573
+ justify-content: space-between;
574
+ align-items: center;
575
+ margin-bottom: 0.75rem;
576
+ padding-bottom: 0.75rem;
577
+ border-bottom: 1px solid var(--border-color);
578
+ }
579
+
580
+ .comparison-name {
581
+ font-weight: 600;
582
+ font-size: 0.9rem;
583
+ }
584
+
585
+ .comparison-count {
586
+ font-size: 0.8rem;
587
+ color: var(--text-muted);
588
+ background: var(--bg-primary);
589
+ padding: 0.2rem 0.5rem;
590
+ border-radius: 6px;
591
+ }
592
+
593
+ /* Stats Grid */
594
+ .stats-grid {
595
+ display: grid;
596
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
597
+ gap: 1rem;
598
+ margin-bottom: 1.5rem;
599
+ }
600
+
601
+ .stat-card {
602
+ background: var(--bg-card);
603
+ border: 1px solid var(--border-color);
604
+ border-radius: 12px;
605
+ padding: 1.25rem;
606
+ transition: all 0.3s ease;
607
+ }
608
+
609
+ .stat-card:hover {
610
+ border-color: var(--border-light);
611
+ transform: translateY(-2px);
612
+ }
613
+
614
+ .stat-icon {
615
+ width: 36px;
616
+ height: 36px;
617
+ border-radius: 10px;
618
+ display: flex;
619
+ align-items: center;
620
+ justify-content: center;
621
+ font-size: 1.1rem;
622
+ margin-bottom: 0.75rem;
623
+ }
624
+
625
+ .stat-card:nth-child(1) .stat-icon { background: rgba(99, 102, 241, 0.15); }
626
+ .stat-card:nth-child(2) .stat-icon { background: rgba(34, 197, 94, 0.15); }
627
+ .stat-card:nth-child(3) .stat-icon { background: rgba(245, 158, 11, 0.15); }
628
+ .stat-card:nth-child(4) .stat-icon { background: rgba(236, 72, 153, 0.15); }
629
+
630
+ .stat-label {
631
+ font-size: 0.8rem;
632
+ color: var(--text-muted);
633
+ margin-bottom: 0.25rem;
634
+ }
635
+
636
+ .stat-number {
637
+ font-size: 1.5rem;
638
+ font-weight: 700;
639
+ color: var(--text-primary);
640
+ }
641
+
642
+ /* Loading */
643
+ .loading-overlay {
644
+ position: fixed;
645
+ top: 0;
646
+ left: 0;
647
+ width: 100%;
648
+ height: 100%;
649
+ background: rgba(10, 10, 15, 0.9);
650
+ backdrop-filter: blur(8px);
651
+ display: flex;
652
+ flex-direction: column;
653
+ align-items: center;
654
+ justify-content: center;
655
+ z-index: 1000;
656
+ transition: opacity 0.3s ease;
657
+ }
658
+
659
+ .loading-overlay.hidden {
660
+ opacity: 0;
661
+ pointer-events: none;
662
+ }
663
+
664
+ .spinner {
665
+ width: 48px;
666
+ height: 48px;
667
+ border: 3px solid var(--border-color);
668
+ border-top-color: var(--accent);
669
+ border-radius: 50%;
670
+ animation: spin 0.8s linear infinite;
671
+ }
672
+
673
+ @keyframes spin {
674
+ to { transform: rotate(360deg); }
675
+ }
676
+
677
+ .loading-text {
678
+ margin-top: 1rem;
679
+ color: var(--text-secondary);
680
+ font-size: 0.9rem;
681
+ }
682
+
683
+ .loading-subtext {
684
+ margin-top: 0.5rem;
685
+ color: var(--text-muted);
686
+ font-size: 0.8rem;
687
+ }
688
+
689
+ /* Error toast */
690
+ .toast {
691
+ position: fixed;
692
+ bottom: 2rem;
693
+ right: 2rem;
694
+ background: var(--bg-tertiary);
695
+ border: 1px solid var(--error);
696
+ border-radius: 12px;
697
+ padding: 1rem 1.5rem;
698
+ display: flex;
699
+ align-items: center;
700
+ gap: 0.75rem;
701
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
702
+ z-index: 1001;
703
+ transform: translateY(100px);
704
+ opacity: 0;
705
+ transition: all 0.3s ease;
706
+ }
707
+
708
+ .toast.show {
709
+ transform: translateY(0);
710
+ opacity: 1;
711
+ }
712
+
713
+ .toast-icon {
714
+ color: var(--error);
715
+ font-size: 1.25rem;
716
+ }
717
+
718
+ .toast-message {
719
+ color: var(--text-primary);
720
+ font-size: 0.9rem;
721
+ }
722
+
723
+ /* Responsive */
724
+ @media (max-width: 768px) {
725
+ .container {
726
+ padding: 1rem;
727
+ }
728
+
729
+ .model-grid {
730
+ grid-template-columns: 1fr;
731
+ }
732
+
733
+ .stats-grid {
734
+ grid-template-columns: repeat(2, 1fr);
735
+ }
736
+
737
+ .custom-model {
738
+ flex-direction: column;
739
  }
740
+
741
+ .input-header {
742
+ flex-direction: column;
743
+ gap: 0.75rem;
744
+ align-items: flex-start;
745
  }
746
  }
747
+
748
+ /* Scrollbar */
749
+ ::-webkit-scrollbar {
750
+ width: 8px;
751
+ height: 8px;
752
+ }
753
+
754
+ ::-webkit-scrollbar-track {
755
+ background: var(--bg-primary);
756
+ }
757
+
758
+ ::-webkit-scrollbar-thumb {
759
+ background: var(--border-color);
760
+ border-radius: 4px;
761
+ }
762
+
763
+ ::-webkit-scrollbar-thumb:hover {
764
+ background: var(--border-light);
765
+ }
766
+
767
+ /* Empty state */
768
+ .empty-state {
769
+ text-align: center;
770
+ padding: 3rem 1rem;
771
+ color: var(--text-muted);
772
+ }
773
+
774
+ .empty-state-icon {
775
+ font-size: 3rem;
776
+ margin-bottom: 1rem;
777
+ opacity: 0.5;
778
+ }
779
+
780
+ .empty-state-text {
781
+ font-size: 1rem;
782
+ margin-bottom: 0.5rem;
783
+ }
784
+
785
+ .empty-state-hint {
786
+ font-size: 0.85rem;
787
+ opacity: 0.7;
788
+ }
789
+
790
+ /* Token color classes */
791
+ .tc-0 { background: rgba(239, 68, 68, 0.15); border-color: rgba(239, 68, 68, 0.3); color: #fca5a5; }
792
+ .tc-1 { background: rgba(249, 115, 22, 0.15); border-color: rgba(249, 115, 22, 0.3); color: #fdba74; }
793
+ .tc-2 { background: rgba(245, 158, 11, 0.15); border-color: rgba(245, 158, 11, 0.3); color: #fcd34d; }
794
+ .tc-3 { background: rgba(132, 204, 22, 0.15); border-color: rgba(132, 204, 22, 0.3); color: #bef264; }
795
+ .tc-4 { background: rgba(34, 197, 94, 0.15); border-color: rgba(34, 197, 94, 0.3); color: #86efac; }
796
+ .tc-5 { background: rgba(20, 184, 166, 0.15); border-color: rgba(20, 184, 166, 0.3); color: #5eead4; }
797
+ .tc-6 { background: rgba(6, 182, 212, 0.15); border-color: rgba(6, 182, 212, 0.3); color: #67e8f9; }
798
+ .tc-7 { background: rgba(59, 130, 246, 0.15); border-color: rgba(59, 130, 246, 0.3); color: #93c5fd; }
799
+ .tc-8 { background: rgba(99, 102, 241, 0.15); border-color: rgba(99, 102, 241, 0.3); color: #a5b4fc; }
800
+ .tc-9 { background: rgba(139, 92, 246, 0.15); border-color: rgba(139, 92, 246, 0.3); color: #c4b5fd; }
801
+ .tc-10 { background: rgba(168, 85, 247, 0.15); border-color: rgba(168, 85, 247, 0.3); color: #d8b4fe; }
802
+ .tc-11 { background: rgba(217, 70, 239, 0.15); border-color: rgba(217, 70, 239, 0.3); color: #e9d5ff; }
803
+ .tc-12 { background: rgba(236, 72, 153, 0.15); border-color: rgba(236, 72, 153, 0.3); color: #f9a8d4; }
804
+ .tc-13 { background: rgba(244, 63, 94, 0.15); border-color: rgba(244, 63, 94, 0.3); color: #fda4af; }
805
+
806
+ .special-token .token-box {
807
+ background: rgba(99, 102, 241, 0.2) !important;
808
+ border-color: var(--accent) !important;
809
+ color: var(--accent-hover) !important;
810
+ font-style: italic;
811
+ }
812
+
813
+ /* Info panel */
814
+ .info-panel {
815
+ background: var(--bg-secondary);
816
+ border-radius: 10px;
817
+ padding: 1rem;
818
+ margin-top: 1rem;
819
+ font-size: 0.85rem;
820
+ color: var(--text-secondary);
821
+ border: 1px solid var(--border-color);
822
+ }
823
+
824
+ .info-panel code {
825
+ background: var(--bg-primary);
826
+ padding: 0.15rem 0.35rem;
827
+ border-radius: 4px;
828
+ font-family: 'JetBrains Mono', monospace;
829
+ font-size: 0.8rem;
830
+ color: var(--accent);
831
+ }
832
+
833
+ /* Copy button */
834
+ .copy-btn {
835
+ background: none;
836
+ border: 1px solid var(--border-color);
837
+ color: var(--text-muted);
838
+ border-radius: 6px;
839
+ padding: 0.4rem 0.75rem;
840
+ font-size: 0.8rem;
841
+ cursor: pointer;
842
+ transition: all 0.2s ease;
843
+ display: inline-flex;
844
+ align-items: center;
845
+ gap: 0.35rem;
846
+ }
847
+
848
+ .copy-btn:hover {
849
+ border-color: var(--accent);
850
+ color: var(--accent);
851
+ }
852
+
853
+ .copy-btn.copied {
854
+ border-color: var(--success);
855
+ color: var(--success);
856
+ }
857
  </style>
858
+ <base target="_blank">
859
  </head>
860
+ <body>
861
+ <div class="bg-mesh"></div>
862
+
863
+ <div class="loading-overlay" id="loadingOverlay">
864
+ <div class="spinner"></div>
865
+ <div class="loading-text">Loading tokenizer...</div>
866
+ <div class="loading-subtext" id="loadingSubtext">This may take a moment for large vocabularies</div>
867
+ </div>
868
+
869
+ <div class="toast" id="toast">
870
+ <span class="toast-icon">⚠️</span>
871
+ <span class="toast-message" id="toastMessage">Error message</span>
872
+ </div>
873
+
874
+ <div class="container">
875
+ <header>
876
+ <div class="logo">
877
+ <div class="logo-icon">πŸ”€</div>
878
+ <h1>TokenViz</h1>
879
  </div>
880
+ <p class="subtitle">
881
+ Universal tokenizer visualization for any HuggingFace model.
882
+ See exactly how LLMs break down text into tokens, IDs, and bytes β€” all in your browser with zero GPU required.
883
+ </p>
 
 
 
 
 
 
884
  </header>
885
 
886
+ <!-- Model Selection -->
887
+ <div class="model-section">
888
+ <div class="section-title">
889
+ <span>πŸ€–</span> Select Model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  </div>
891
+ <div class="model-grid" id="modelGrid">
892
+ <!-- Models will be populated by JS -->
 
 
 
 
 
 
 
 
 
 
 
 
893
  </div>
894
+ <div class="custom-model">
895
+ <input type="text" id="customModelInput" placeholder="custom-org/model-name (e.g., meta-llama/Llama-3.1-8B)" />
896
+ <button class="btn" id="loadCustomBtn" onclick="loadCustomModel()">
897
+ <span>πŸ“₯</span> Load
 
 
 
 
 
 
 
 
 
898
  </button>
899
  </div>
900
+ </div>
901
+
902
+ <!-- Stats -->
903
+ <div class="stats-grid" id="statsGrid" style="display: none;">
904
+ <div class="stat-card">
905
+ <div class="stat-icon">πŸ“Š</div>
906
+ <div class="stat-label">Vocabulary Size</div>
907
+ <div class="stat-number" id="vocabSize">-</div>
 
 
 
 
 
 
 
 
908
  </div>
909
+ <div class="stat-card">
910
+ <div class="stat-icon">πŸ”’</div>
911
+ <div class="stat-label">Token Count</div>
912
+ <div class="stat-number" id="tokenCount">-</div>
913
  </div>
914
+ <div class="stat-card">
915
+ <div class="stat-icon">πŸ“</div>
916
+ <div class="stat-label">Char / Token Ratio</div>
917
+ <div class="stat-number" id="ratio">-</div>
918
  </div>
919
+ <div class="stat-card">
920
+ <div class="stat-icon">⚑</div>
921
+ <div class="stat-label">Model Type</div>
922
+ <div class="stat-number" id="modelType">-</div>
923
+ </div>
924
+ </div>
925
+
926
+ <!-- Input -->
927
+ <div class="input-section">
928
+ <div class="input-header">
929
+ <div class="section-title" style="margin: 0;">
930
+ <span>✏️</span> Input Text
931
+ </div>
932
+ <div class="input-stats">
933
+ <div class="stat-item">
934
+ <span>Chars:</span>
935
+ <span class="stat-value" id="charCount">0</span>
936
+ </div>
937
+ <div class="stat-item">
938
+ <span>Words:</span>
939
+ <span class="stat-value" id="wordCount">0</span>
940
+ </div>
 
 
 
 
 
941
  </div>
942
  </div>
943
+ <textarea id="inputText" placeholder="Type or paste text here to see how the tokenizer breaks it down...
944
+
945
+ Try: 'Hello world! 🌍 The quick brown fox jumps over 13 lazy dogs.'" oninput="handleInput()"></textarea>
946
+ <div class="input-actions">
947
+ <button class="btn btn-secondary" onclick="loadExample('simple')">Simple</button>
948
+ <button class="btn btn-secondary" onclick="loadExample('code')">Code</button>
949
+ <button class="btn btn-secondary" onclick="loadExample('multilingual')">Multilingual</button>
950
+ <button class="btn btn-secondary" onclick="loadExample('math')">Math</button>
951
+ <button class="btn btn-secondary" onclick="loadExample('emoji')">Emoji</button>
952
+ <button class="btn btn-secondary" onclick="clearText()">Clear</button>
953
+ <button class="btn" onclick="copyTokenIds()" id="copyBtn" style="margin-left: auto;">
954
+ <span>πŸ“‹</span> Copy IDs
955
+ </button>
956
  </div>
957
+ </div>
 
958
 
959
+ <!-- Visualization -->
960
+ <div class="viz-section">
961
+ <div class="viz-tabs">
962
+ <button class="viz-tab active" onclick="switchTab('tokens')">πŸ”€ Tokens</button>
963
+ <button class="viz-tab" onclick="switchTab('bytes')">πŸ’Ύ Bytes</button>
964
+ <button class="viz-tab" onclick="switchTab('ids')">πŸ”’ IDs</button>
965
+ <button class="viz-tab" onclick="switchTab('compare')">βš–οΈ Compare</button>
966
+ </div>
967
+
968
+ <div id="vizContent">
969
+ <div class="empty-state">
970
+ <div class="empty-state-icon">πŸ”</div>
971
+ <div class="empty-state-text">Enter text above to visualize tokenization</div>
972
+ <div class="empty-state-hint">Select a model and start typing to see the magic happen</div>
973
+ </div>
974
+ </div>
975
+ </div>
976
+
977
+ <!-- Info -->
978
+ <div class="info-panel">
979
+ <strong>πŸ’‘ How it works:</strong> This app uses <code>@huggingface/transformers</code> (v3.5.0) to load tokenizer files directly from the HuggingFace Hub in your browser.
980
+ It downloads <code>tokenizer.json</code> and <code>tokenizer_config.json</code> and runs tokenization entirely client-side with WebAssembly β€” no GPU or server required.
981
+ Works with BPE, WordPiece, Unigram, and SentencePiece tokenizers from any model.
982
+ </div>
983
  </div>
984
 
985
+ <script>
986
+ // ============================================
987
+ // CONFIGURATION & STATE
988
+ // ============================================
989
+ const PRESET_MODELS = [
990
+ { id: 'Xenova/gpt2', name: 'GPT-2', org: 'OpenAI', type: 'BPE' },
991
+ { id: 'Xenova/bert-base-uncased', name: 'BERT Base', org: 'Google', type: 'WordPiece' },
992
+ { id: 'Xenova/meta-llama/Llama-3.1-8B', name: 'Llama 3.1', org: 'Meta', type: 'BPE' },
993
+ { id: 'Xenova/mistralai/Mistral-7B-v0.1', name: 'Mistral 7B', org: 'Mistral AI', type: 'BPE' },
994
+ { id: 'Xenova/t5-small', name: 'T5 Small', org: 'Google', type: 'SentencePiece' },
995
+ { id: 'Xenova/deepseek-ai/DeepSeek-V3', name: 'DeepSeek V3', org: 'DeepSeek', type: 'BPE' },
996
+ { id: 'Xenova/Qwen/Qwen2.5-7B-Instruct', name: 'Qwen 2.5', org: 'Alibaba', type: 'BPE' },
997
+ { id: 'Xenova/microsoft/Phi-3-mini-4k-instruct', name: 'Phi-3 Mini', org: 'Microsoft', type: 'BPE' },
998
+ { id: 'Xenova/HuggingFaceTB/SmolLM2-360M-Instruct', name: 'SmolLM2', org: 'HuggingFace', type: 'BPE' },
999
+ { id: 'Xenova/google/gemma-2-2b-it', name: 'Gemma 2', org: 'Google', type: 'BPE' },
1000
+ { id: 'Xenova/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', name: 'Nous Hermes', org: 'Nous', type: 'BPE' },
1001
+ { id: 'Xenova/stabilityai/stablelm-2-1_6b', name: 'StableLM 2', org: 'Stability AI', type: 'BPE' },
1002
+ ];
1003
 
1004
+ let currentModel = null;
1005
+ let currentTokenizer = null;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006
  let currentTokens = [];
1007
+ let activeTab = 'tokens';
1008
+ let compareMode = false;
1009
+ let compareTokenizers = {};
1010
+
1011
+ // ============================================
1012
+ // INITIALIZATION
1013
+ // ============================================
1014
+ function init() {
1015
+ renderModelGrid();
1016
+ // Auto-select first model
1017
+ selectModel(PRESET_MODELS[0].id);
 
 
 
1018
  }
 
1019
 
1020
+ function renderModelGrid() {
1021
+ const grid = document.getElementById('modelGrid');
1022
+ grid.innerHTML = PRESET_MODELS.map(model => `
1023
+ <div class="model-card ${currentModel === model.id ? 'active' : ''}"
1024
+ onclick="selectModel('${model.id}')"
1025
+ data-model="${model.id}">
1026
+ <div class="model-name">${model.name}</div>
1027
+ <div class="model-org">${model.org}</div>
1028
+ <span class="model-badge">${model.type}</span>
1029
+ </div>
1030
+ `).join('');
1031
+ }
1032
+
1033
+ // ============================================
1034
+ // MODEL LOADING
1035
+ // ============================================
1036
+ async function selectModel(modelId) {
1037
+ if (currentModel === modelId && currentTokenizer) return;
1038
+
1039
+ showLoading(true, `Loading ${modelId}...`);
1040
+
1041
+ try {
1042
+ const { AutoTokenizer } = window.transformers;
1043
 
1044
+ // Use from_pretrained with local files only if needed, but default to hub
1045
+ const tokenizer = await AutoTokenizer.from_pretrained(modelId, {
1046
+ revision: 'main',
1047
+ // Allow remote since we're in browser
1048
+ });
1049
+
1050
+ currentModel = modelId;
1051
+ currentTokenizer = tokenizer;
1052
+
1053
+ // Update UI
1054
+ document.querySelectorAll('.model-card').forEach(card => {
1055
+ card.classList.toggle('active', card.dataset.model === modelId);
1056
+ });
1057
+
1058
+ // Update stats
1059
+ updateModelStats();
1060
+
1061
+ // Re-tokenize if text exists
1062
+ const text = document.getElementById('inputText').value;
1063
+ if (text) {
1064
+ await tokenize(text);
1065
+ }
1066
+
1067
+ showToast(`Loaded ${getModelName(modelId)} successfully`, 'success');
1068
+ } catch (err) {
1069
+ console.error('Failed to load tokenizer:', err);
1070
+ showToast(`Failed to load ${modelId}: ${err.message}`, 'error');
1071
+ } finally {
1072
+ showLoading(false);
1073
  }
1074
+ }
1075
+
1076
+ async function loadCustomModel() {
1077
+ const input = document.getElementById('customModelInput');
1078
+ const modelId = input.value.trim();
1079
 
 
 
 
1080
  if (!modelId) {
1081
+ showToast('Please enter a model ID', 'error');
1082
+ return;
1083
+ }
1084
+
1085
+ // Check if already in presets
1086
+ const exists = PRESET_MODELS.find(m => m.id === modelId || m.id.endsWith(modelId));
1087
+ if (exists) {
1088
+ await selectModel(exists.id);
1089
  return;
1090
  }
1091
 
1092
+ showLoading(true, `Loading ${modelId}...`);
 
 
 
 
 
1093
 
1094
  try {
1095
+ const { AutoTokenizer } = window.transformers;
1096
+ const tokenizer = await AutoTokenizer.from_pretrained(modelId);
1097
+
1098
+ // Add to presets
1099
+ const newModel = {
1100
+ id: modelId,
1101
+ name: modelId.split('/').pop(),
1102
+ org: modelId.split('/')[0] || 'Custom',
1103
+ type: 'Unknown'
1104
+ };
1105
+ PRESET_MODELS.push(newModel);
1106
+ renderModelGrid();
1107
+
1108
+ // Scroll to and select
1109
+ await selectModel(modelId);
1110
+ input.value = '';
1111
+
1112
+ showToast(`Loaded custom model ${modelId}`, 'success');
1113
  } catch (err) {
1114
+ showToast(`Failed to load ${modelId}: ${err.message}`, 'error');
 
 
 
1115
  } finally {
1116
+ showLoading(false);
 
 
 
1117
  }
1118
  }
1119
 
1120
+ function updateModelStats() {
1121
+ if (!currentTokenizer) return;
1122
+
1123
+ const vocab = currentTokenizer.vocab || {};
1124
+ const vocabSize = Object.keys(vocab).length || currentTokenizer.vocab_size || '?';
1125
+
1126
+ document.getElementById('vocabSize').textContent = vocabSize.toLocaleString();
1127
+ document.getElementById('modelType').textContent = currentTokenizer.model_type || 'Unknown';
1128
+ document.getElementById('statsGrid').style.display = 'grid';
1129
+ }
1130
+
1131
+ // ============================================
1132
+ // TOKENIZATION
1133
+ // ============================================
1134
+ async function handleInput() {
1135
+ const text = document.getElementById('inputText').value;
1136
+
1137
+ // Update char/word count
1138
+ document.getElementById('charCount').textContent = text.length;
1139
+ document.getElementById('wordCount').textContent = text.trim() ? text.trim().split(/\s+/).length : 0;
1140
+
1141
+ if (!currentTokenizer) {
1142
+ showToast('Please select a model first', 'error');
1143
  return;
1144
  }
1145
 
1146
+ await tokenize(text);
1147
+ }
1148
+
1149
+ async function tokenize(text) {
1150
+ if (!text || !currentTokenizer) return;
1151
+
1152
  try {
1153
+ // Get tokens with their text representation
1154
+ const encoding = currentTokenizer.encode(text, { return_offsets_mapping: true });
1155
+ const tokenIds = encoding.input_ids || encoding;
1156
+
1157
+ // Convert IDs to tokens
1158
+ const tokens = [];
1159
+ for (let i = 0; i < tokenIds.length; i++) {
1160
+ const id = tokenIds[i];
1161
+ let tokenText = '';
1162
+
1163
+ try {
1164
+ // Try to decode single token
1165
+ tokenText = currentTokenizer.decode([id], { skip_special_tokens: false });
1166
+ } catch (e) {
1167
+ // Fallback: try to get from vocab
1168
+ const vocab = currentTokenizer.vocab || {};
1169
+ const reverseVocab = Object.fromEntries(
1170
+ Object.entries(vocab).map(([k, v]) => [v, k])
1171
+ );
1172
+ tokenText = reverseVocab[id] || `[${id}]`;
1173
+ }
1174
+
1175
+ // Detect special tokens
1176
+ const specialTokens = currentTokenizer.special_tokens || [];
1177
+ const isSpecial = specialTokens.some(st => tokenText.includes(st)) ||
1178
+ id === currentTokenizer.bos_token_id ||
1179
+ id === currentTokenizer.eos_token_id ||
1180
+ id === currentTokenizer.pad_token_id ||
1181
+ id === currentTokenizer.unk_token_id;
1182
+
1183
+ tokens.push({
1184
+ id: id,
1185
+ text: tokenText,
1186
+ isSpecial: isSpecial,
1187
+ index: i
1188
+ });
1189
+ }
1190
+
1191
+ currentTokens = tokens;
1192
+
1193
+ // Update stats
1194
+ document.getElementById('tokenCount').textContent = tokens.length;
1195
+ document.getElementById('ratio').textContent = text.length > 0
1196
+ ? (text.length / tokens.length).toFixed(2)
1197
+ : '-';
1198
+
1199
+ // Render
1200
+ renderVisualization();
1201
  } catch (err) {
1202
+ console.error('Tokenization error:', err);
1203
+ showToast(`Tokenization failed: ${err.message}`, 'error');
1204
  }
1205
  }
1206
 
1207
+ // ============================================
1208
+ // VISUALIZATION RENDERING
1209
+ // ============================================
1210
+ function renderVisualization() {
1211
+ const container = document.getElementById('vizContent');
1212
+
1213
+ if (!currentTokens.length) {
1214
+ container.innerHTML = `
1215
+ <div class="empty-state">
1216
+ <div class="empty-state-icon">πŸ”</div>
1217
+ <div class="empty-state-text">Enter text above to visualize tokenization</div>
1218
+ </div>
1219
+ `;
1220
  return;
1221
  }
1222
 
1223
+ switch (activeTab) {
1224
+ case 'tokens':
1225
+ renderTokensView(container);
1226
+ break;
1227
+ case 'bytes':
1228
+ renderBytesView(container);
1229
+ break;
1230
+ case 'ids':
1231
+ renderIdsView(container);
1232
+ break;
1233
+ case 'compare':
1234
+ renderCompareView(container);
1235
+ break;
1236
+ }
1237
+ }
1238
+
1239
+ function renderTokensView(container) {
1240
+ const html = currentTokens.map((token, idx) => {
1241
+ const colorClass = `tc-${idx % 14}`;
1242
+ const specialClass = token.isSpecial ? 'special-token' : '';
1243
+ const displayText = escapeHtml(token.text).replace(/ /g, 'Β·').replace(/\n/g, '\n');
1244
+
1245
+ return `
1246
+ <div class="token ${specialClass}">
1247
+ <div class="token-tooltip">
1248
+ <div class="tooltip-row">
1249
+ <span class="tooltip-label">ID:</span>
1250
+ <span class="tooltip-value">${token.id}</span>
1251
+ </div>
1252
+ <div class="tooltip-row">
1253
+ <span class="tooltip-label">Text:</span>
1254
+ <span class="tooltip-value">"${escapeHtml(token.text)}"</span>
1255
+ </div>
1256
+ <div class="tooltip-row">
1257
+ <span class="tooltip-label">Index:</span>
1258
+ <span class="tooltip-value">${token.index}</span>
1259
+ </div>
1260
+ ${token.isSpecial ? '<div class="tooltip-row"><span class="tooltip-label">Type:</span><span class="tooltip-value" style="color: var(--accent)">Special Token</span></div>' : ''}
1261
+ </div>
1262
+ <div class="token-box ${colorClass}">${displayText || 'Β·'}</div>
1263
+ <div class="token-id">${token.id}</div>
1264
+ </div>
1265
+ `;
1266
+ }).join('');
1267
+
1268
+ container.innerHTML = `<div class="tokens-container">${html}</div>`;
1269
+ }
1270
+
1271
+ function renderBytesView(container) {
1272
+ const text = document.getElementById('inputText').value;
1273
+ const encoder = new TextEncoder();
1274
+ const bytes = encoder.encode(text);
1275
+
1276
+ let html = '<div class="byte-view">';
1277
 
1278
+ // Group by tokens
1279
+ let byteIdx = 0;
1280
+ currentTokens.forEach((token, tIdx) => {
1281
+ const tokenBytes = encoder.encode(token.text);
1282
+ const colorClass = `tc-${tIdx % 14}`;
1283
+
1284
+ html += `<div style="margin-bottom: 0.5rem; padding: 0.5rem; background: var(--bg-secondary); border-radius: 8px;">`;
1285
+ html += `<div style="font-size: 0.8rem; color: var(--text-muted); margin-bottom: 0.25rem;">Token ${tIdx}: "${escapeHtml(token.text)}" (ID: ${token.id})</div>`;
1286
+
1287
+ for (let i = 0; i < tokenBytes.length; i++) {
1288
+ const byte = tokenBytes[i];
1289
+ const char = byte >= 32 && byte < 127 ? String.fromCharCode(byte) : 'Β·';
1290
+ html += `
1291
+ <div class="byte-row">
1292
+ <span class="byte-char">${escapeHtml(char)}</span>
1293
+ <span class="byte-hex">0x${byte.toString(16).padStart(2, '0')}</span>
1294
+ <span class="byte-dec" style="width: 3rem; text-align: center; color: var(--text-muted); font-size: 0.8rem;">${byte}</span>
1295
+ <span class="byte-token ${colorClass}" style="font-size: 0.75rem; padding: 0.1rem 0.3rem;">Byte ${i}</span>
1296
+ </div>
1297
+ `;
1298
+ }
1299
+ html += '</div>';
1300
  });
1301
+
1302
+ html += '</div>';
1303
+ container.innerHTML = html;
1304
  }
1305
 
1306
+ function renderIdsView(container) {
1307
+ const ids = currentTokens.map(t => t.id);
1308
+ const html = ids.map((id, idx) => `
1309
+ <div class="id-chip" title="Index: ${idx}">${id}</div>
1310
+ `).join('');
1311
 
1312
+ container.innerHTML = `
1313
+ <div style="margin-bottom: 1rem;">
1314
+ <div class="copy-btn" onclick="copyToClipboard('[${ids.join(', ')}]')">
1315
+ <span>πŸ“‹</span> Copy Array
1316
+ </div>
1317
+ <div class="copy-btn" onclick="copyToClipboard(${JSON.stringify(ids)})" style="margin-left: 0.5rem;">
1318
+ <span>πŸ“‹</span> Copy JSON
1319
+ </div>
1320
+ </div>
1321
+ <div class="id-list">${html}</div>
1322
+ <div style="margin-top: 1rem; padding: 1rem; background: var(--bg-secondary); border-radius: 8px; font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; color: var(--text-secondary); overflow-x: auto;">
1323
+ [${ids.join(', ')}]
1324
+ </div>
1325
+ `;
1326
  }
1327
 
1328
+ async function renderCompareView(container) {
1329
+ // Load a few comparison tokenizers if not loaded
1330
+ const compareModels = PRESET_MODELS.slice(0, 4).filter(m => m.id !== currentModel);
1331
+
1332
+ let html = '<div class="comparison-grid">';
1333
+
1334
+ // Current model
1335
+ html += `
1336
+ <div class="comparison-card" style="border-color: var(--accent);">
1337
+ <div class="comparison-header">
1338
+ <span class="comparison-name" style="color: var(--accent);">⭐ ${getModelName(currentModel)}</span>
1339
+ <span class="comparison-count">${currentTokens.length} tokens</span>
1340
+ </div>
1341
+ <div class="tokens-container" style="font-size: 0.75rem;">
1342
+ ${currentTokens.map((t, i) => `
1343
+ <div class="token">
1344
+ <div class="token-box tc-${i % 14}" style="padding: 0.2rem 0.4rem; font-size: 0.75rem;">${escapeHtml(t.text).replace(/ /g, 'Β·') || 'Β·'}</div>
1345
+ </div>
1346
+ `).join('')}
1347
+ </div>
1348
+ </div>
1349
+ `;
1350
+
1351
+ // Compare with others
1352
+ const text = document.getElementById('inputText').value;
1353
+ for (const model of compareModels) {
1354
+ try {
1355
+ if (!compareTokenizers[model.id]) {
1356
+ const { AutoTokenizer } = window.transformers;
1357
+ compareTokenizers[model.id] = await AutoTokenizer.from_pretrained(model.id);
1358
+ }
1359
+
1360
+ const tok = compareTokenizers[model.id];
1361
+ const encoding = tok.encode(text);
1362
+ const ids = encoding.input_ids || encoding;
1363
+
1364
+ const otherTokens = [];
1365
+ for (const id of ids) {
1366
+ let txt = '';
1367
+ try {
1368
+ txt = tok.decode([id], { skip_special_tokens: false });
1369
+ } catch (e) {
1370
+ txt = `[${id}]`;
1371
+ }
1372
+ otherTokens.push(txt);
1373
+ }
1374
+
1375
+ html += `
1376
+ <div class="comparison-card">
1377
+ <div class="comparison-header">
1378
+ <span class="comparison-name">${model.name}</span>
1379
+ <span class="comparison-count">${otherTokens.length} tokens</span>
1380
+ </div>
1381
+ <div class="tokens-container" style="font-size: 0.75rem;">
1382
+ ${otherTokens.map((t, i) => `
1383
+ <div class="token">
1384
+ <div class="token-box tc-${i % 14}" style="padding: 0.2rem 0.4rem; font-size: 0.75rem;">${escapeHtml(t).replace(/ /g, 'Β·') || 'Β·'}</div>
1385
+ </div>
1386
+ `).join('')}
1387
+ </div>
1388
+ </div>
1389
+ `;
1390
+ } catch (e) {
1391
+ html += `
1392
+ <div class="comparison-card">
1393
+ <div class="comparison-header">
1394
+ <span class="comparison-name">${model.name}</span>
1395
+ </div>
1396
+ <div style="color: var(--text-muted); font-size: 0.85rem;">Failed to load</div>
1397
+ </div>
1398
+ `;
1399
+ }
1400
  }
1401
+
1402
+ html += '</div>';
1403
+ container.innerHTML = html;
1404
  }
1405
 
1406
+ // ============================================
1407
+ // UI HELPERS
1408
+ // ============================================
1409
+ function switchTab(tab) {
1410
+ activeTab = tab;
1411
+ document.querySelectorAll('.viz-tab').forEach(t => t.classList.remove('active'));
1412
+ event.target.classList.add('active');
1413
+ renderVisualization();
1414
+ }
1415
 
1416
+ function loadExample(type) {
1417
+ const examples = {
1418
+ simple: "Hello world! This is a simple example of how tokenization works.",
1419
+ code: "function fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}",
1420
+ multilingual: "Hello δΈ–η•Œ 🌍! Bonjour le monde! Β‘Hola mundo! γ“γ‚“γ«γ‘γ―δΈ–η•Œ!",
1421
+ math: "The equation $E = mc^2$ shows that energy equals mass times the speed of light squared. ∫(x² + 3x)dx",
1422
+ emoji: "πŸŽ‰πŸŽŠ Party time! πŸ₯³πŸŽ‚πŸŽˆπŸŽπŸŽ„πŸŽƒπŸ¦ƒπŸŽ…πŸ€ΆπŸ§‘β€πŸŽ„πŸŽ†πŸŽ‡βœ¨πŸŽ€πŸŽ‹πŸŽπŸŽŽπŸŽπŸŽπŸŽ‘πŸ§§πŸŽ€πŸŽπŸŽ—οΈπŸŽŸοΈπŸŽ«πŸŽ–οΈπŸ†πŸ…πŸ₯‡πŸ₯ˆπŸ₯‰"
1423
+ };
1424
 
1425
+ document.getElementById('inputText').value = examples[type];
1426
+ handleInput();
1427
+ }
 
 
1428
 
1429
+ function clearText() {
1430
+ document.getElementById('inputText').value = '';
1431
+ handleInput();
 
 
1432
  }
1433
 
1434
+ function copyTokenIds() {
1435
+ if (!currentTokens.length) return;
1436
+ const ids = currentTokens.map(t => t.id);
1437
+ copyToClipboard(`[${ids.join(', ')}]`);
1438
  }
1439
 
1440
+ async function copyToClipboard(text) {
1441
+ try {
1442
+ await navigator.clipboard.writeText(text);
1443
+ showToast('Copied to clipboard!', 'success');
1444
+ } catch (err) {
1445
+ // Fallback
1446
+ const textarea = document.createElement('textarea');
1447
+ textarea.value = text;
1448
+ document.body.appendChild(textarea);
1449
+ textarea.select();
1450
+ document.execCommand('copy');
1451
+ document.body.removeChild(textarea);
1452
+ showToast('Copied to clipboard!', 'success');
1453
+ }
1454
  }
1455
 
1456
+ function getModelName(modelId) {
1457
+ const preset = PRESET_MODELS.find(m => m.id === modelId);
1458
+ return preset ? preset.name : modelId.split('/').pop();
 
 
 
 
1459
  }
1460
 
1461
+ function escapeHtml(text) {
1462
+ const div = document.createElement('div');
1463
+ div.textContent = text;
1464
+ return div.innerHTML;
1465
+ }
1466
 
1467
+ // ============================================
1468
+ // LOADING & TOAST
1469
+ // ============================================
1470
+ function showLoading(show, text = '') {
1471
+ const overlay = document.getElementById('loadingOverlay');
1472
+ const subtext = document.getElementById('loadingSubtext');
1473
+
1474
+ if (show) {
1475
+ overlay.querySelector('.loading-text').textContent = text || 'Loading...';
1476
+ overlay.classList.remove('hidden');
1477
+ } else {
1478
+ overlay.classList.add('hidden');
1479
+ }
1480
+ }
1481
+
1482
+ function showToast(message, type = 'error') {
1483
+ const toast = document.getElementById('toast');
1484
+ const msgEl = document.getElementById('toastMessage');
1485
+ const iconEl = toast.querySelector('.toast-icon');
1486
+
1487
+ msgEl.textContent = message;
1488
+
1489
+ if (type === 'success') {
1490
+ iconEl.textContent = 'βœ…';
1491
+ toast.style.borderColor = 'var(--success)';
1492
+ } else {
1493
+ iconEl.textContent = '⚠️';
1494
+ toast.style.borderColor = 'var(--error)';
1495
+ }
1496
+
1497
+ toast.classList.add('show');
1498
+ setTimeout(() => toast.classList.remove('show'), 3000);
1499
+ }
1500
+
1501
+ // ============================================
1502
+ // START
1503
+ // ============================================
1504
+ document.addEventListener('DOMContentLoaded', init);
1505
 
1506
+ // Handle Enter key in custom model input
1507
+ document.getElementById('customModelInput')?.addEventListener('keypress', (e) => {
1508
+ if (e.key === 'Enter') loadCustomModel();
 
1509
  });
1510
  </script>
1511
  </body>