Jonna Marie Matthiesen Claude Opus 4.6 commited on
Commit
7671b7c
·
1 Parent(s): bd14011

Add accuracy tables, RTX 3500 Ada latency, and fix 270M variant grouping

Browse files

- Add accuracy CSVs for Llama-3.2, Gemma-3, and Qwen3 (baseline vs FlashHead)
- Add RTX 3500 Ada TPS-only latency data for all families with README measurements
- Add Gemma-3-270M RTX 3500 Ada data (first latency data for this variant)
- Register rtx_3500_ada device in config with experiment setup
- Fix deriveBaseFamily regex to match M (million) suffixes so Gemma-3-270M
groups under Gemma-3 in the sidebar

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

app.js CHANGED
@@ -113,7 +113,7 @@ let MODEL_FAMILIES = {};
113
  // Derive base family from config key by parsing model name patterns.
114
  // Size suffixes like -2B, -0.8B identify specific size variants.
115
  function deriveBaseFamily(key) {
116
- const match = key.match(/^(.+?)-(\d+(?:\.\d+)?B)$/i);
117
  if (match) return match[1];
118
  return key;
119
  }
 
113
  // Derive base family from config key by parsing model name patterns.
114
  // Size suffixes like -2B, -0.8B identify specific size variants.
115
  function deriveBaseFamily(key) {
116
+ const match = key.match(/^(.+?)-(\d+(?:\.\d+)?[BM])$/i);
117
  if (match) return match[1];
118
  return key;
119
  }
config.json CHANGED
@@ -28,7 +28,8 @@
28
  "orin_nano": "Jetson Orin Nano Super",
29
  "orin_nano_super": "Jetson Orin Nano Super",
30
  "agx_orin": "Jetson AGX Orin",
31
- "agx_thor": "Jetson AGX Thor"
 
32
  }
33
  }
34
  ],
@@ -227,28 +228,34 @@
227
  },
228
  "Llama-3.2": {
229
  "data_file": "data/Llama-3.2.csv",
 
230
  "experiment_setup": {
231
  "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
232
  "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
233
- "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
 
234
  },
235
  "default_device": "agx_orin"
236
  },
237
  "Gemma-3": {
238
  "data_file": "data/Gemma-3.csv",
 
239
  "experiment_setup": {
240
  "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
241
  "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
242
- "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
 
243
  },
244
  "default_device": "agx_orin"
245
  },
246
  "Qwen3": {
247
  "data_file": "data/Qwen3.csv",
 
248
  "experiment_setup": {
249
  "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
250
  "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
251
- "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
 
252
  },
253
  "default_device": "agx_orin"
254
  }
 
28
  "orin_nano": "Jetson Orin Nano Super",
29
  "orin_nano_super": "Jetson Orin Nano Super",
30
  "agx_orin": "Jetson AGX Orin",
31
+ "agx_thor": "Jetson AGX Thor",
32
+ "rtx_3500_ada": "RTX 3500 Ada"
33
  }
34
  }
35
  ],
 
228
  },
229
  "Llama-3.2": {
230
  "data_file": "data/Llama-3.2.csv",
231
+ "accuracy_file": "data/acc-Llama-3.2.csv",
232
  "experiment_setup": {
233
  "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
234
  "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
235
+ "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
236
+ "rtx_3500_ada": "Measurement setup: vLLM 0.10.2, batch_size=1, 32 input tokens, 128 output tokens generated, 10 warm-up runs, averaged over 100 runs."
237
  },
238
  "default_device": "agx_orin"
239
  },
240
  "Gemma-3": {
241
  "data_file": "data/Gemma-3.csv",
242
+ "accuracy_file": "data/acc-Gemma-3.csv",
243
  "experiment_setup": {
244
  "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
245
  "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
246
+ "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
247
+ "rtx_3500_ada": "Measurement setup: vLLM 0.10.2, batch_size=1, 32 input tokens, 128 output tokens generated, 10 warm-up runs, averaged over 100 runs."
248
  },
249
  "default_device": "agx_orin"
250
  },
251
  "Qwen3": {
252
  "data_file": "data/Qwen3.csv",
253
+ "accuracy_file": "data/acc-Qwen3.csv",
254
  "experiment_setup": {
255
  "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
256
  "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
257
+ "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
258
+ "rtx_3500_ada": "Measurement setup: vLLM 0.10.2, batch_size=1, 32 input tokens, 128 output tokens generated, 10 warm-up runs, averaged over 100 runs."
259
  },
260
  "default_device": "agx_orin"
261
  }
data/Gemma-3.csv CHANGED
@@ -3,3 +3,11 @@ Gemma-3-1B,google/gemma-3-1b-it,text,1,agx_thor,N/A,N/A,N/A,3.2242,79.40,12.59,0
3
  Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,agx_thor,N/A,N/A,N/A,3.0829,83.04,12.04,0.00
4
  Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.6719,153.12,6.53,0.00
5
  Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.8132,141.18,7.08,0.00
 
 
 
 
 
 
 
 
 
3
  Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,agx_thor,N/A,N/A,N/A,3.0829,83.04,12.04,0.00
4
  Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.6719,153.12,6.53,0.00
5
  Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.8132,141.18,7.08,0.00
6
+ Gemma-3-1B,google/gemma-3-1b-it,text,1,rtx_3500_ada,N/A,N/A,N/A,,148.00,,
7
+ Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,178.00,,
8
+ Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,243.00,,
9
+ Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,336.00,,
10
+ Gemma-3-270M,google/gemma-3-270m-it,text,1,rtx_3500_ada,N/A,N/A,N/A,,397.00,,
11
+ Gemma-3-270M,embedl/gemma-3-270m-it-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,526.00,,
12
+ Gemma-3-270M,embedl/gemma-3-270m-it-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,420.00,,
13
+ Gemma-3-270M,embedl/gemma-3-270m-it-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,568.00,,
data/Llama-3.2.csv CHANGED
@@ -19,3 +19,11 @@ Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,orin_nano_super,N/A,N/A,N/A
19
  Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,orin_nano_super,N/A,N/A,N/A,5.8939,43.43,23.02,0.00
20
  Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,2.2979,111.41,8.98,0.00
21
  Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,3.2571,78.60,12.72,0.00
 
 
 
 
 
 
 
 
 
19
  Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,orin_nano_super,N/A,N/A,N/A,5.8939,43.43,23.02,0.00
20
  Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,2.2979,111.41,8.98,0.00
21
  Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,3.2571,78.60,12.72,0.00
22
+ Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,rtx_3500_ada,N/A,N/A,N/A,,130.00,,
23
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,163.00,,
24
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,278.00,,
25
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,485.00,,
26
+ Llama-3.2-3B,meta-llama/Llama-3.2-3B-Instruct,text,1,rtx_3500_ada,N/A,N/A,N/A,,54.00,,
27
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,58.00,,
28
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,141.00,,
29
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,177.00,,
data/Qwen3.csv CHANGED
@@ -11,3 +11,7 @@ Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.9020,
11
  Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_orin,N/A,N/A,N/A,2.5357,100.96,9.91,0.00
12
  Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_orin,N/A,N/A,N/A,2.1783,117.52,8.51,0.00
13
  Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,1.8865,135.70,7.37,0.00
 
 
 
 
 
11
  Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_orin,N/A,N/A,N/A,2.5357,100.96,9.91,0.00
12
  Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_orin,N/A,N/A,N/A,2.1783,117.52,8.51,0.00
13
  Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,1.8865,135.70,7.37,0.00
14
+ Qwen3-1.7B,Qwen/Qwen3-1.7B,text,1,rtx_3500_ada,N/A,N/A,N/A,,100.00,,
15
+ Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,114.00,,
16
+ Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,206.00,,
17
+ Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,271.00,,
data/acc-Gemma-3.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Model,MMLU-Pro,IFEval,BBH,TruthfulQA,GSM8K
2
+ google/gemma-3-1b-it,0.15,0.55,0.38,0.31,0.42
3
+ embedl/gemma-3-1b-it-FlashHead,0.15,0.49,0.38,0.31,0.39
data/acc-Llama-3.2.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,MMLU-Pro,HellaSwag,IFEval,BoolQ,BBH,TruthfulQA,GSM8K
2
+ meta-llama/Llama-3.2-1B-Instruct,0.18,0.59,0.45,0.69,0.38,0.36,0.46
3
+ embedl/Llama-3.2-1B-Instruct-FlashHead,0.18,0.59,0.45,0.69,0.38,0.36,0.46
4
+ meta-llama/Llama-3.2-3B-Instruct,0.31,,0.57,,0.57,0.57,0.77
5
+ embedl/Llama-3.2-3B-Instruct-FlashHead,0.31,,0.56,,0.57,0.58,0.77
data/acc-Qwen3.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Model,MMLU-Pro,IFEval,BBH,TruthfulQA,GSM8K
2
+ Qwen/Qwen3-1.7B,0.38,0.24,0.45,0.47,0.13
3
+ embedl/Qwen3-1.7B-FlashHead,0.38,0.25,0.45,0.47,0.12