quickgrid commited on
Commit
b26223b
Β·
verified Β·
1 Parent(s): 6ff3f85
Files changed (1) hide show
  1. index.html +21 -133
index.html CHANGED
@@ -108,6 +108,7 @@
108
  position: sticky;
109
  top: 0;
110
  z-index: 100;
 
111
  }
112
 
113
  .logo {
@@ -116,6 +117,7 @@
116
  gap: 10px;
117
  text-decoration: none;
118
  color: var(--text);
 
119
  }
120
  .logo-hex {
121
  width: 34px;
@@ -151,65 +153,25 @@
151
  letter-spacing: .5px;
152
  }
153
 
154
- .header-right {
155
- display: flex;
156
- align-items: center;
157
- gap: 16px;
158
- }
159
- .header-badge {
160
- display: flex;
161
- align-items: center;
162
- gap: 6px;
163
- font-size: 12px;
164
- color: var(--text2);
165
- font-family: 'JetBrains Mono', monospace;
166
- }
167
- .header-badge .dot {
168
- width: 7px;
169
- height: 7px;
170
- border-radius: 50%;
171
- background: var(--green);
172
- box-shadow: 0 0 8px var(--green);
173
- animation: pulse-dot 2s ease-in-out infinite;
174
- }
175
- @keyframes pulse-dot {
176
- 0%,100% { opacity: 1; }
177
- 50% { opacity: .4; }
178
- }
179
-
180
- /* ─── Model Selector Bar ─────────────────────────────── */
181
- .model-bar {
182
- padding: 16px 32px;
183
- border-bottom: 1px solid var(--border);
184
- background: rgba(11,18,32,.7);
185
- backdrop-filter: blur(12px);
186
- }
187
- .model-bar-label {
188
- font-size: 11px;
189
- font-family: 'JetBrains Mono', monospace;
190
- color: var(--text3);
191
- letter-spacing: 1.5px;
192
- text-transform: uppercase;
193
- margin-bottom: 10px;
194
- }
195
  .model-tabs {
196
  display: flex;
197
- flex-wrap: wrap;
198
- gap: 8px;
199
  align-items: center;
 
200
  }
201
  .model-tab {
202
  display: flex;
203
  flex-direction: column;
204
- padding: 8px 14px;
205
  border: 1px solid var(--border);
206
- border-radius: 10px;
207
  background: var(--bg2);
208
  cursor: pointer;
209
  transition: all 0.2s ease;
210
  position: relative;
211
  overflow: hidden;
212
- min-width: 110px;
213
  }
214
  .model-tab::before {
215
  content: '';
@@ -258,12 +220,12 @@
258
  top: -1px;
259
  }
260
 
261
- /* Custom model row */
262
  .custom-model-row {
263
  display: flex;
264
  align-items: center;
265
- gap: 10px;
266
- margin-top: 12px;
267
  }
268
  .custom-model-row label {
269
  font-size: 11px;
@@ -272,22 +234,22 @@
272
  white-space: nowrap;
273
  }
274
  .custom-input {
275
- flex: 1;
276
- max-width: 380px;
277
  background: var(--bg2);
278
  border: 1px solid var(--border);
279
  border-radius: 8px;
280
  color: var(--text);
281
  font-family: 'JetBrains Mono', monospace;
282
  font-size: 13px;
283
- padding: 7px 12px;
284
  outline: none;
285
  transition: border-color 0.2s;
286
  }
287
  .custom-input:focus { border-color: var(--accent); }
288
  .custom-input::placeholder { color: var(--text3); }
289
  .btn {
290
- padding: 7px 16px;
291
  border-radius: 8px;
292
  border: 1px solid var(--border2);
293
  background: linear-gradient(135deg, rgba(77,158,245,.15), rgba(139,106,245,.15));
@@ -802,7 +764,6 @@
802
  /* ─── Responsive ─────────────────────────────────────── */
803
  @media (max-width: 900px) {
804
  header { padding: 0 16px; }
805
- .model-bar { padding: 12px 16px; }
806
  main { grid-template-columns: 1fr; }
807
  .input-panel { border-right: none; border-bottom: 1px solid var(--border); }
808
  .stats-row { grid-template-columns: repeat(2, 1fr); }
@@ -833,29 +794,20 @@
833
  <div class="logo">
834
  <div class="logo-hex">T</div>
835
  <span class="logo-name">TokenLens</span>
836
- <span class="logo-tag">v1.0</span>
837
  </div>
838
- <div class="header-right">
839
- <div class="header-badge">
840
- <span class="dot"></span>
841
- <span>runs in-browser Β· no server Β· no GPU</span>
842
- </div>
843
- </div>
844
- </header>
845
-
846
- <!-- Model Selector Bar -->
847
- <div class="model-bar">
848
- <div class="model-bar-label">β–Έ select tokenizer</div>
849
  <div class="model-tabs" id="model-tabs">
850
  <!-- populated by JS -->
851
  </div>
 
852
  <div class="custom-model-row">
853
  <label>HF model id:</label>
854
  <input class="custom-input" id="custom-model-input" type="text"
855
- placeholder="e.g. deepseek-ai/DeepSeek-V4-Pro or Xenova/gpt2" />
856
  <button class="btn" id="custom-model-btn">Load β†—</button>
857
  </div>
858
- </div>
859
 
860
  <!-- Main -->
861
  <main>
@@ -973,18 +925,7 @@ env.allowLocalModels = false;
973
  env.useBrowserCache = true;
974
 
975
  // ── Model Registry ─────────────────────────────────────────
976
- // Add any HuggingFace model ID here β€” tokenizer.json + tokenizer_config.json
977
- // are the only files downloaded (no weights, no GPU needed).
978
  const MODELS = [
979
- {
980
- id: 'Xenova/gpt2',
981
- name: 'GPT-2',
982
- org: 'OpenAI',
983
- color: '#10a37f',
984
- vocab: '50k',
985
- type: 'BPE',
986
- desc: 'Classic GPT-2 BPE tokenizer'
987
- },
988
  {
989
  id: 'Xenova/gpt-4',
990
  name: 'GPT-4',
@@ -994,15 +935,6 @@ const MODELS = [
994
  type: 'tiktoken cl100k',
995
  desc: 'Used by GPT-3.5 & GPT-4'
996
  },
997
- {
998
- id: 'Xenova/llama-tokenizer',
999
- name: 'LLaMA 2',
1000
- org: 'Meta',
1001
- color: '#0466de',
1002
- vocab: '32k',
1003
- type: 'SP-BPE',
1004
- desc: 'SentencePiece BPE β€” LLaMA / LLaMA-2'
1005
- },
1006
  {
1007
  id: 'Xenova/mistral-tokenizer-v1',
1008
  name: 'Mistral',
@@ -1012,24 +944,6 @@ const MODELS = [
1012
  type: 'SP-BPE',
1013
  desc: 'Mistral 7B v0.1 tokenizer'
1014
  },
1015
- {
1016
- id: 'Xenova/bert-base-uncased',
1017
- name: 'BERT',
1018
- org: 'Google',
1019
- color: '#4285f4',
1020
- vocab: '30k',
1021
- type: 'WordPiece',
1022
- desc: 'BERT-base uncased WordPiece'
1023
- },
1024
- {
1025
- id: 'Xenova/t5-base',
1026
- name: 'T5',
1027
- org: 'Google',
1028
- color: '#34a853',
1029
- vocab: '32k',
1030
- type: 'Unigram',
1031
- desc: 'T5 SentencePiece Unigram'
1032
- },
1033
  {
1034
  id: 'Xenova/claude-tokenizer',
1035
  name: 'Claude',
@@ -1039,15 +953,6 @@ const MODELS = [
1039
  type: 'BPE',
1040
  desc: "Anthropic Claude's tokenizer"
1041
  },
1042
- {
1043
- id: 'Xenova/roberta-base',
1044
- name: 'RoBERTa',
1045
- org: 'Meta',
1046
- color: '#1a73e8',
1047
- vocab: '50k',
1048
- type: 'BPE',
1049
- desc: 'RoBERTa byte-level BPE'
1050
- },
1051
  ];
1052
 
1053
  // ── Token Color Palette ────────────────────────────────────
@@ -1178,18 +1083,12 @@ function setStats(tokens, text) {
1178
  }
1179
 
1180
  // ── Decode raw token string for display ───────────────────
1181
- // Handles BPE Δ  prefix, SentencePiece ▁ prefix, byte tokens, etc.
1182
  function decodeTokenString(raw) {
1183
  if (!raw) return '';
1184
- // BPE space prefix
1185
  let s = raw.replace(/^Δ /, ' ').replace(/Δ /g, ' ');
1186
- // SentencePiece space prefix
1187
  s = s.replace(/^▁/, ' ').replace(/▁/g, ' ');
1188
- // Newline representation
1189
  s = s.replace(/Ċ/g, '\n');
1190
- // Carriage return
1191
  s = s.replace(/\r/g, '');
1192
- // Byte tokens like <0xAB>
1193
  s = s.replace(/<0x([0-9A-Fa-f]{2})>/g, (_, hex) => {
1194
  const code = parseInt(hex, 16);
1195
  return code < 128 ? String.fromCharCode(code) : `[0x${hex}]`;
@@ -1210,7 +1109,6 @@ async function tokenize(text) {
1210
  try {
1211
  $placeholder.style.display = 'none';
1212
 
1213
- // Run tokenizer β€” only tokenize, no special tokens by default
1214
  const encoded = await activeTokenizer(text, {
1215
  add_special_tokens: showSpecial,
1216
  return_offsets_mapping: false,
@@ -1218,18 +1116,15 @@ async function tokenize(text) {
1218
 
1219
  const ids = Array.from(encoded.input_ids.data);
1220
 
1221
- // Get raw token strings
1222
  let rawTokens;
1223
  try {
1224
  rawTokens = activeTokenizer.model.convert_ids_to_tokens(ids);
1225
  } catch {
1226
- // Fallback: decode each token individually
1227
  rawTokens = await Promise.all(
1228
  ids.map(id => activeTokenizer.decode([id], { skip_special_tokens: false }))
1229
  );
1230
  }
1231
 
1232
- // Pair: { id, raw, display }
1233
  const tokens = ids.map((id, i) => ({
1234
  id,
1235
  raw: rawTokens[i] || '',
@@ -1265,7 +1160,6 @@ function renderTextView(tokens) {
1265
  span.style.color = c.text;
1266
  span.style.borderBottom = `2px solid ${c.border}`;
1267
 
1268
- // Display text β€” handle spaces and newlines visually
1269
  const disp = tok.display;
1270
  if (disp === ' ') {
1271
  span.innerHTML = '&nbsp;';
@@ -1277,7 +1171,6 @@ function renderTextView(tokens) {
1277
  span.textContent = disp;
1278
  }
1279
 
1280
- // Tooltip
1281
  const tip = document.createElement('div');
1282
  tip.className = 'tok-tooltip';
1283
 
@@ -1317,7 +1210,6 @@ function renderIdView(tokens) {
1317
 
1318
  const bot = document.createElement('div');
1319
  bot.className = 'tok-id-bottom';
1320
- // Show abbreviated display text
1321
  const label = tok.display.slice(0, 8).replace(/\n/g,'↡').replace(/\t/g,'β†’');
1322
  bot.textContent = label || '…';
1323
 
@@ -1494,7 +1386,6 @@ async function loadCustomModel() {
1494
  const id = $customInput.value.trim();
1495
  if (!id) { showToast('Please enter a model ID'); return; }
1496
 
1497
- // Deselect tabs
1498
  document.querySelectorAll('.model-tab').forEach(t => t.classList.remove('active'));
1499
  activeModel = id;
1500
  await loadModel(id);
@@ -1508,15 +1399,12 @@ $customInput.addEventListener('keydown', e => {
1508
  // ── Init ───────────────────────────────────────────────────
1509
 
1510
  buildTabs();
1511
- // Hide overlay on start (no model yet)
1512
  $overlay.classList.add('hidden');
1513
 
1514
- // Default placeholder text
1515
  $input.value = '';
1516
 
1517
- // Auto-select first model
1518
  selectTab(MODELS[0].id);
1519
 
1520
  </script>
1521
  </body>
1522
- </html>
 
108
  position: sticky;
109
  top: 0;
110
  z-index: 100;
111
+ gap: 16px;
112
  }
113
 
114
  .logo {
 
117
  gap: 10px;
118
  text-decoration: none;
119
  color: var(--text);
120
+ flex-shrink: 0;
121
  }
122
  .logo-hex {
123
  width: 34px;
 
153
  letter-spacing: .5px;
154
  }
155
 
156
+ /* Model tabs in header */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  .model-tabs {
158
  display: flex;
159
+ gap: 6px;
 
160
  align-items: center;
161
+ flex-shrink: 0;
162
  }
163
  .model-tab {
164
  display: flex;
165
  flex-direction: column;
166
+ padding: 6px 10px;
167
  border: 1px solid var(--border);
168
+ border-radius: 8px;
169
  background: var(--bg2);
170
  cursor: pointer;
171
  transition: all 0.2s ease;
172
  position: relative;
173
  overflow: hidden;
174
+ min-width: 90px;
175
  }
176
  .model-tab::before {
177
  content: '';
 
220
  top: -1px;
221
  }
222
 
223
+ /* Custom model row in header */
224
  .custom-model-row {
225
  display: flex;
226
  align-items: center;
227
+ gap: 8px;
228
+ flex-shrink: 0;
229
  }
230
  .custom-model-row label {
231
  font-size: 11px;
 
234
  white-space: nowrap;
235
  }
236
  .custom-input {
237
+ width: 180px;
238
+ max-width: 220px;
239
  background: var(--bg2);
240
  border: 1px solid var(--border);
241
  border-radius: 8px;
242
  color: var(--text);
243
  font-family: 'JetBrains Mono', monospace;
244
  font-size: 13px;
245
+ padding: 6px 10px;
246
  outline: none;
247
  transition: border-color 0.2s;
248
  }
249
  .custom-input:focus { border-color: var(--accent); }
250
  .custom-input::placeholder { color: var(--text3); }
251
  .btn {
252
+ padding: 6px 14px;
253
  border-radius: 8px;
254
  border: 1px solid var(--border2);
255
  background: linear-gradient(135deg, rgba(77,158,245,.15), rgba(139,106,245,.15));
 
764
  /* ─── Responsive ─────────────────────────────────────── */
765
  @media (max-width: 900px) {
766
  header { padding: 0 16px; }
 
767
  main { grid-template-columns: 1fr; }
768
  .input-panel { border-right: none; border-bottom: 1px solid var(--border); }
769
  .stats-row { grid-template-columns: repeat(2, 1fr); }
 
794
  <div class="logo">
795
  <div class="logo-hex">T</div>
796
  <span class="logo-name">TokenLens</span>
797
+ <span class="logo-tag">v1.1</span>
798
  </div>
799
+
 
 
 
 
 
 
 
 
 
 
800
  <div class="model-tabs" id="model-tabs">
801
  <!-- populated by JS -->
802
  </div>
803
+
804
  <div class="custom-model-row">
805
  <label>HF model id:</label>
806
  <input class="custom-input" id="custom-model-input" type="text"
807
+ placeholder="e.g. Xenova/gpt2" />
808
  <button class="btn" id="custom-model-btn">Load β†—</button>
809
  </div>
810
+ </header>
811
 
812
  <!-- Main -->
813
  <main>
 
925
  env.useBrowserCache = true;
926
 
927
  // ── Model Registry ─────────────────────────────────────────
 
 
928
  const MODELS = [
 
 
 
 
 
 
 
 
 
929
  {
930
  id: 'Xenova/gpt-4',
931
  name: 'GPT-4',
 
935
  type: 'tiktoken cl100k',
936
  desc: 'Used by GPT-3.5 & GPT-4'
937
  },
 
 
 
 
 
 
 
 
 
938
  {
939
  id: 'Xenova/mistral-tokenizer-v1',
940
  name: 'Mistral',
 
944
  type: 'SP-BPE',
945
  desc: 'Mistral 7B v0.1 tokenizer'
946
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947
  {
948
  id: 'Xenova/claude-tokenizer',
949
  name: 'Claude',
 
953
  type: 'BPE',
954
  desc: "Anthropic Claude's tokenizer"
955
  },
 
 
 
 
 
 
 
 
 
956
  ];
957
 
958
  // ── Token Color Palette ────────────────────────────────────
 
1083
  }
1084
 
1085
  // ── Decode raw token string for display ───────────────────
 
1086
  function decodeTokenString(raw) {
1087
  if (!raw) return '';
 
1088
  let s = raw.replace(/^Δ /, ' ').replace(/Δ /g, ' ');
 
1089
  s = s.replace(/^▁/, ' ').replace(/▁/g, ' ');
 
1090
  s = s.replace(/Ċ/g, '\n');
 
1091
  s = s.replace(/\r/g, '');
 
1092
  s = s.replace(/<0x([0-9A-Fa-f]{2})>/g, (_, hex) => {
1093
  const code = parseInt(hex, 16);
1094
  return code < 128 ? String.fromCharCode(code) : `[0x${hex}]`;
 
1109
  try {
1110
  $placeholder.style.display = 'none';
1111
 
 
1112
  const encoded = await activeTokenizer(text, {
1113
  add_special_tokens: showSpecial,
1114
  return_offsets_mapping: false,
 
1116
 
1117
  const ids = Array.from(encoded.input_ids.data);
1118
 
 
1119
  let rawTokens;
1120
  try {
1121
  rawTokens = activeTokenizer.model.convert_ids_to_tokens(ids);
1122
  } catch {
 
1123
  rawTokens = await Promise.all(
1124
  ids.map(id => activeTokenizer.decode([id], { skip_special_tokens: false }))
1125
  );
1126
  }
1127
 
 
1128
  const tokens = ids.map((id, i) => ({
1129
  id,
1130
  raw: rawTokens[i] || '',
 
1160
  span.style.color = c.text;
1161
  span.style.borderBottom = `2px solid ${c.border}`;
1162
 
 
1163
  const disp = tok.display;
1164
  if (disp === ' ') {
1165
  span.innerHTML = '&nbsp;';
 
1171
  span.textContent = disp;
1172
  }
1173
 
 
1174
  const tip = document.createElement('div');
1175
  tip.className = 'tok-tooltip';
1176
 
 
1210
 
1211
  const bot = document.createElement('div');
1212
  bot.className = 'tok-id-bottom';
 
1213
  const label = tok.display.slice(0, 8).replace(/\n/g,'↡').replace(/\t/g,'β†’');
1214
  bot.textContent = label || '…';
1215
 
 
1386
  const id = $customInput.value.trim();
1387
  if (!id) { showToast('Please enter a model ID'); return; }
1388
 
 
1389
  document.querySelectorAll('.model-tab').forEach(t => t.classList.remove('active'));
1390
  activeModel = id;
1391
  await loadModel(id);
 
1399
  // ── Init ───────────────────────────────────────────────────
1400
 
1401
  buildTabs();
 
1402
  $overlay.classList.add('hidden');
1403
 
 
1404
  $input.value = '';
1405
 
 
1406
  selectTab(MODELS[0].id);
1407
 
1408
  </script>
1409
  </body>
1410
+ </html>