Jonna Marie Matthiesen Claude Opus 4.6 commited on
Commit ·
7671b7c
1
Parent(s): bd14011
Add accuracy tables, RTX 3500 Ada latency, and fix 270M variant grouping
Browse files- Add accuracy CSVs for Llama-3.2, Gemma-3, and Qwen3 (baseline vs FlashHead)
- Add RTX 3500 Ada TPS-only latency data for all families with README measurements
- Add Gemma-3-270M RTX 3500 Ada data (first latency data for this variant)
- Register rtx_3500_ada device in config with experiment setup
- Fix deriveBaseFamily regex to match M (million) suffixes so Gemma-3-270M
groups under Gemma-3 in the sidebar
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- app.js +1 -1
- config.json +11 -4
- data/Gemma-3.csv +8 -0
- data/Llama-3.2.csv +8 -0
- data/Qwen3.csv +4 -0
- data/acc-Gemma-3.csv +3 -0
- data/acc-Llama-3.2.csv +5 -0
- data/acc-Qwen3.csv +3 -0
app.js
CHANGED
|
@@ -113,7 +113,7 @@ let MODEL_FAMILIES = {};
|
|
| 113 |
// Derive base family from config key by parsing model name patterns.
|
| 114 |
// Size suffixes like -2B, -0.8B identify specific size variants.
|
| 115 |
function deriveBaseFamily(key) {
|
| 116 |
-
const match = key.match(/^(.+?)-(\d+(?:\.\d+)?
|
| 117 |
if (match) return match[1];
|
| 118 |
return key;
|
| 119 |
}
|
|
|
|
| 113 |
// Derive base family from config key by parsing model name patterns.
|
| 114 |
// Size suffixes like -2B, -0.8B identify specific size variants.
|
| 115 |
function deriveBaseFamily(key) {
|
| 116 |
+
const match = key.match(/^(.+?)-(\d+(?:\.\d+)?[BM])$/i);
|
| 117 |
if (match) return match[1];
|
| 118 |
return key;
|
| 119 |
}
|
config.json
CHANGED
|
@@ -28,7 +28,8 @@
|
|
| 28 |
"orin_nano": "Jetson Orin Nano Super",
|
| 29 |
"orin_nano_super": "Jetson Orin Nano Super",
|
| 30 |
"agx_orin": "Jetson AGX Orin",
|
| 31 |
-
"agx_thor": "Jetson AGX Thor"
|
|
|
|
| 32 |
}
|
| 33 |
}
|
| 34 |
],
|
|
@@ -227,28 +228,34 @@
|
|
| 227 |
},
|
| 228 |
"Llama-3.2": {
|
| 229 |
"data_file": "data/Llama-3.2.csv",
|
|
|
|
| 230 |
"experiment_setup": {
|
| 231 |
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 232 |
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 233 |
-
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
|
|
|
|
| 234 |
},
|
| 235 |
"default_device": "agx_orin"
|
| 236 |
},
|
| 237 |
"Gemma-3": {
|
| 238 |
"data_file": "data/Gemma-3.csv",
|
|
|
|
| 239 |
"experiment_setup": {
|
| 240 |
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 241 |
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 242 |
-
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
|
|
|
|
| 243 |
},
|
| 244 |
"default_device": "agx_orin"
|
| 245 |
},
|
| 246 |
"Qwen3": {
|
| 247 |
"data_file": "data/Qwen3.csv",
|
|
|
|
| 248 |
"experiment_setup": {
|
| 249 |
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 250 |
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 251 |
-
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
|
|
|
|
| 252 |
},
|
| 253 |
"default_device": "agx_orin"
|
| 254 |
}
|
|
|
|
| 28 |
"orin_nano": "Jetson Orin Nano Super",
|
| 29 |
"orin_nano_super": "Jetson Orin Nano Super",
|
| 30 |
"agx_orin": "Jetson AGX Orin",
|
| 31 |
+
"agx_thor": "Jetson AGX Thor",
|
| 32 |
+
"rtx_3500_ada": "RTX 3500 Ada"
|
| 33 |
}
|
| 34 |
}
|
| 35 |
],
|
|
|
|
| 228 |
},
|
| 229 |
"Llama-3.2": {
|
| 230 |
"data_file": "data/Llama-3.2.csv",
|
| 231 |
+
"accuracy_file": "data/acc-Llama-3.2.csv",
|
| 232 |
"experiment_setup": {
|
| 233 |
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 234 |
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 235 |
+
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 236 |
+
"rtx_3500_ada": "Measurement setup: vLLM 0.10.2, batch_size=1, 32 input tokens, 128 output tokens generated, 10 warm-up runs, averaged over 100 runs."
|
| 237 |
},
|
| 238 |
"default_device": "agx_orin"
|
| 239 |
},
|
| 240 |
"Gemma-3": {
|
| 241 |
"data_file": "data/Gemma-3.csv",
|
| 242 |
+
"accuracy_file": "data/acc-Gemma-3.csv",
|
| 243 |
"experiment_setup": {
|
| 244 |
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 245 |
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 246 |
+
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 247 |
+
"rtx_3500_ada": "Measurement setup: vLLM 0.10.2, batch_size=1, 32 input tokens, 128 output tokens generated, 10 warm-up runs, averaged over 100 runs."
|
| 248 |
},
|
| 249 |
"default_device": "agx_orin"
|
| 250 |
},
|
| 251 |
"Qwen3": {
|
| 252 |
"data_file": "data/Qwen3.csv",
|
| 253 |
+
"accuracy_file": "data/acc-Qwen3.csv",
|
| 254 |
"experiment_setup": {
|
| 255 |
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 256 |
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 257 |
+
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 258 |
+
"rtx_3500_ada": "Measurement setup: vLLM 0.10.2, batch_size=1, 32 input tokens, 128 output tokens generated, 10 warm-up runs, averaged over 100 runs."
|
| 259 |
},
|
| 260 |
"default_device": "agx_orin"
|
| 261 |
}
|
data/Gemma-3.csv
CHANGED
|
@@ -3,3 +3,11 @@ Gemma-3-1B,google/gemma-3-1b-it,text,1,agx_thor,N/A,N/A,N/A,3.2242,79.40,12.59,0
|
|
| 3 |
Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,agx_thor,N/A,N/A,N/A,3.0829,83.04,12.04,0.00
|
| 4 |
Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.6719,153.12,6.53,0.00
|
| 5 |
Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.8132,141.18,7.08,0.00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,agx_thor,N/A,N/A,N/A,3.0829,83.04,12.04,0.00
|
| 4 |
Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.6719,153.12,6.53,0.00
|
| 5 |
Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.8132,141.18,7.08,0.00
|
| 6 |
+
Gemma-3-1B,google/gemma-3-1b-it,text,1,rtx_3500_ada,N/A,N/A,N/A,,148.00,,
|
| 7 |
+
Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,178.00,,
|
| 8 |
+
Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,243.00,,
|
| 9 |
+
Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,336.00,,
|
| 10 |
+
Gemma-3-270M,google/gemma-3-270m-it,text,1,rtx_3500_ada,N/A,N/A,N/A,,397.00,,
|
| 11 |
+
Gemma-3-270M,embedl/gemma-3-270m-it-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,526.00,,
|
| 12 |
+
Gemma-3-270M,embedl/gemma-3-270m-it-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,420.00,,
|
| 13 |
+
Gemma-3-270M,embedl/gemma-3-270m-it-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,568.00,,
|
data/Llama-3.2.csv
CHANGED
|
@@ -19,3 +19,11 @@ Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,orin_nano_super,N/A,N/A,N/A
|
|
| 19 |
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,orin_nano_super,N/A,N/A,N/A,5.8939,43.43,23.02,0.00
|
| 20 |
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,2.2979,111.41,8.98,0.00
|
| 21 |
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,3.2571,78.60,12.72,0.00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,orin_nano_super,N/A,N/A,N/A,5.8939,43.43,23.02,0.00
|
| 20 |
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,2.2979,111.41,8.98,0.00
|
| 21 |
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,3.2571,78.60,12.72,0.00
|
| 22 |
+
Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,rtx_3500_ada,N/A,N/A,N/A,,130.00,,
|
| 23 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,163.00,,
|
| 24 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,278.00,,
|
| 25 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,485.00,,
|
| 26 |
+
Llama-3.2-3B,meta-llama/Llama-3.2-3B-Instruct,text,1,rtx_3500_ada,N/A,N/A,N/A,,54.00,,
|
| 27 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,58.00,,
|
| 28 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,141.00,,
|
| 29 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,177.00,,
|
data/Qwen3.csv
CHANGED
|
@@ -11,3 +11,7 @@ Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.9020,
|
|
| 11 |
Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_orin,N/A,N/A,N/A,2.5357,100.96,9.91,0.00
|
| 12 |
Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_orin,N/A,N/A,N/A,2.1783,117.52,8.51,0.00
|
| 13 |
Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,1.8865,135.70,7.37,0.00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_orin,N/A,N/A,N/A,2.5357,100.96,9.91,0.00
|
| 12 |
Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_orin,N/A,N/A,N/A,2.1783,117.52,8.51,0.00
|
| 13 |
Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,1.8865,135.70,7.37,0.00
|
| 14 |
+
Qwen3-1.7B,Qwen/Qwen3-1.7B,text,1,rtx_3500_ada,N/A,N/A,N/A,,100.00,,
|
| 15 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead,text,1,rtx_3500_ada,N/A,N/A,N/A,,114.00,,
|
| 16 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,206.00,,
|
| 17 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,rtx_3500_ada,N/A,N/A,N/A,,271.00,,
|
data/acc-Gemma-3.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Model,MMLU-Pro,IFEval,BBH,TruthfulQA,GSM8K
|
| 2 |
+
google/gemma-3-1b-it,0.15,0.55,0.38,0.31,0.42
|
| 3 |
+
embedl/gemma-3-1b-it-FlashHead,0.15,0.49,0.38,0.31,0.39
|
data/acc-Llama-3.2.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Model,MMLU-Pro,HellaSwag,IFEval,BoolQ,BBH,TruthfulQA,GSM8K
|
| 2 |
+
meta-llama/Llama-3.2-1B-Instruct,0.18,0.59,0.45,0.69,0.38,0.36,0.46
|
| 3 |
+
embedl/Llama-3.2-1B-Instruct-FlashHead,0.18,0.59,0.45,0.69,0.38,0.36,0.46
|
| 4 |
+
meta-llama/Llama-3.2-3B-Instruct,0.31,,0.57,,0.57,0.57,0.77
|
| 5 |
+
embedl/Llama-3.2-3B-Instruct-FlashHead,0.31,,0.56,,0.57,0.58,0.77
|
data/acc-Qwen3.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Model,MMLU-Pro,IFEval,BBH,TruthfulQA,GSM8K
|
| 2 |
+
Qwen/Qwen3-1.7B,0.38,0.24,0.45,0.47,0.13
|
| 3 |
+
embedl/Qwen3-1.7B-FlashHead,0.38,0.25,0.45,0.47,0.12
|