Jonna Marie Matthiesen Claude Opus 4.6 (1M context) commited on
Commit
b96722d
Β·
1 Parent(s): 7faf78c

Add Llama-3.2, Gemma-3, and Qwen3 benchmarks and improve chart rendering

Browse files

Add benchmark data for three new model families (Llama-3.2, Gemma-3,
Qwen3) on NVIDIA edge devices using vLLM 0.19.0. Improve chart/table
UX by sorting bars by model size, hiding all-zero metrics, prefixing
labels when multiple families are shown, and supporting configurable
base display names.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (5) hide show
  1. app.js +84 -36
  2. config.json +33 -0
  3. data/Gemma-3.csv +5 -0
  4. data/Llama-3.2.csv +21 -0
  5. data/Qwen3.csv +13 -0
app.js CHANGED
@@ -154,6 +154,11 @@ function detectFamilies() {
154
  if (!families[fk]) families[fk] = { base: fk, models: [] };
155
  if (!families[fk].models.includes(model)) families[fk].models.push(model);
156
  });
 
 
 
 
 
157
  } else {
158
  const externalNames = ALL_MODELS.filter(isExternalModel).map(m => m.split("/").pop());
159
  externalNames.sort((a, b) => b.length - a.length);
@@ -224,22 +229,20 @@ function assignModelColors() {
224
  MODEL_SHORT[model] = suffix || (isExternalModel(model) ? "Original" : name);
225
  });
226
  });
227
- // Resolve duplicate short labels: use the model's short name instead
228
- const labelCounts = {};
229
- for (const m of ALL_MODELS) {
230
- const lbl = MODEL_SHORT[m];
231
- if (!labelCounts[lbl]) labelCounts[lbl] = [];
232
- labelCounts[lbl].push(m);
233
- }
234
- for (const [lbl, models] of Object.entries(labelCounts)) {
235
- if (models.length > 1) {
236
- models.forEach(m => { MODEL_SHORT[m] = m.split("/").pop(); });
237
- }
238
- }
239
  }
240
 
241
  // ─── Helpers ──────────────────────────────────────────────────────────────────
242
 
 
 
 
 
 
 
 
 
243
  function isOOMRow(row) {
244
  return config.metrics.every(m => row[m.column] === null);
245
  }
@@ -505,10 +508,14 @@ function buildChart(filtered) {
505
 
506
  chartHeader.appendChild(headerLeft);
507
 
508
- if (config.metrics.length > 1) {
 
 
 
 
509
  const metricEl = metricGroup.querySelector(".btn-group");
510
  renderBtnGroup(metricEl,
511
- config.metrics.map(m => ({ value: m.column, label: m.short || m.column })),
512
  filters.metric
513
  );
514
  chartHeader.appendChild(metricGroup);
@@ -537,7 +544,31 @@ function buildChart(filtered) {
537
  const picked = models.map(m => allRows.find(r => r[MODEL_COL] === m)).filter(Boolean);
538
  if (!picked.length) return;
539
 
540
- const labels = picked.map(r => MODEL_SHORT[r[MODEL_COL]]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  const data = picked.map(r => r[metricCol] === null ? 0 : r[metricCol]);
542
  const bgColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].bg);
543
  const borderColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].border);
@@ -622,11 +653,16 @@ function buildTables(filtered, chartsShown) {
622
  );
623
  });
624
 
 
 
 
 
 
625
  // Build column list: Model + visible display cols + metrics
626
  const colDefs = [
627
  { key: MODEL_COL, label: "MODEL", isModel: true },
628
  ...visibleDisplay.map(dc => ({ key: dc.column, label: dc.label, description: dc.description || "" })),
629
- ...config.metrics.map(m => ({ key: m.column, label: m.short || m.column, isMetric: true, description: m.description || "" })),
630
  ];
631
 
632
  // Resolve table_sort: family-specific overrides global
@@ -635,35 +671,47 @@ function buildTables(filtered, chartsShown) {
635
  const tableGroupBy = familyCfg.table_group_by || config.table_group_by || "";
636
  const tableGroupCols = Array.isArray(tableGroupBy) ? tableGroupBy : (tableGroupBy ? [tableGroupBy] : []);
637
 
 
 
 
 
 
638
  groupVals.forEach(gv => {
639
  const rows = filtered.filter(r => String(r[GROUP_BY]) === String(gv));
640
  if (!rows.length) return;
 
 
641
  rows.sort((a, b) => {
642
- for (const rule of sortRules) {
643
- const col = rule.column;
644
- const mul = rule.direction === "desc" ? -1 : 1;
645
- if (rule.external_first && col === MODEL_COL) {
646
- const aExt = isExternalModel(a[col]) ? 0 : 1;
647
- const bExt = isExternalModel(b[col]) ? 0 : 1;
648
- if (aExt !== bExt) return (aExt - bExt) * mul;
649
- }
650
  const av = a[col], bv = b[col];
651
- if (av === bv || (av == null && bv == null)) continue;
652
- if (av == null) return 1;
653
- if (bv == null) return -1;
654
- if (typeof av === "number" && typeof bv === "number") {
655
- if (av !== bv) return (av - bv) * mul;
656
- } else {
657
  const aNum = parseFloat(String(av));
658
  const bNum = parseFloat(String(bv));
659
  if (!isNaN(aNum) && !isNaN(bNum)) {
660
- if (aNum !== bNum) return (aNum - bNum) * mul;
 
 
 
661
  }
662
- const cmp = String(av).localeCompare(String(bv));
663
- if (cmp !== 0) return cmp * mul;
664
  }
665
  }
666
- return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
667
  });
668
 
669
  // Track row group for break detection
@@ -897,10 +945,10 @@ async function switchBaseFamily(baseFamilyKey) {
897
  DATA = allRows;
898
  ALL_MODELS = [...new Set(DATA.map(r => r[MODEL_COL]))];
899
  MODEL_FAMILIES = detectFamilies();
900
- // Rebuild display variants from detected model_family values
901
  bf.variants = Object.keys(MODEL_FAMILIES).filter(v =>
902
  deriveBaseFamily(v) === baseFamilyKey
903
- );
904
  assignModelColors();
905
  renderSidebar();
906
  updateDependentFilters(true);
 
154
  if (!families[fk]) families[fk] = { base: fk, models: [] };
155
  if (!families[fk].models.includes(model)) families[fk].models.push(model);
156
  });
157
+ // Use base_name from config if provided, otherwise fall back to family key
158
+ const baseNames = config.base_names || {};
159
+ for (const fk of Object.keys(families)) {
160
+ if (baseNames[fk]) families[fk].base = baseNames[fk];
161
+ }
162
  } else {
163
  const externalNames = ALL_MODELS.filter(isExternalModel).map(m => m.split("/").pop());
164
  externalNames.sort((a, b) => b.length - a.length);
 
229
  MODEL_SHORT[model] = suffix || (isExternalModel(model) ? "Original" : name);
230
  });
231
  });
232
+ // NOTE: duplicate short labels are resolved at chart render time
233
+ // so that the disambiguation depends on which models are actually visible.
 
 
 
 
 
 
 
 
 
 
234
  }
235
 
236
  // ─── Helpers ──────────────────────────────────────────────────────────────────
237
 
238
+ /** Extract model size in billions from a string like "Llama-3.2-3B" or "Gemma-3-270M". */
239
+ function parseModelSize(s) {
240
+ const m = s.match(/(\d+(?:\.\d+)?)\s*(B|M|b|m)\b/);
241
+ if (!m) return 0;
242
+ const n = parseFloat(m[1]);
243
+ return m[2].toUpperCase() === "M" ? n / 1000 : n;
244
+ }
245
+
246
  function isOOMRow(row) {
247
  return config.metrics.every(m => row[m.column] === null);
248
  }
 
508
 
509
  chartHeader.appendChild(headerLeft);
510
 
511
+ // Only show metric buttons for metrics that have non-zero data
512
+ const chartVisibleMetrics = config.metrics.filter(m =>
513
+ gRows.some(r => r[m.column] !== null && r[m.column] !== 0)
514
+ );
515
+ if (chartVisibleMetrics.length > 1) {
516
  const metricEl = metricGroup.querySelector(".btn-group");
517
  renderBtnGroup(metricEl,
518
+ chartVisibleMetrics.map(m => ({ value: m.column, label: m.short || m.column })),
519
  filters.metric
520
  );
521
  chartHeader.appendChild(metricGroup);
 
544
  const picked = models.map(m => allRows.find(r => r[MODEL_COL] === m)).filter(Boolean);
545
  if (!picked.length) return;
546
 
547
+ // Sort bars: model size β†’ Original first β†’ metric value
548
+ const hib = metricCfg.higher_is_better !== false;
549
+ picked.sort((a, b) => {
550
+ const sizeA = parseModelSize(a[FAMILY_COL] || a[MODEL_COL]);
551
+ const sizeB = parseModelSize(b[FAMILY_COL] || b[MODEL_COL]);
552
+ if (sizeA !== sizeB) return sizeA - sizeB;
553
+ const extA = isExternalModel(a[MODEL_COL]) ? 0 : 1;
554
+ const extB = isExternalModel(b[MODEL_COL]) ? 0 : 1;
555
+ if (extA !== extB) return extA - extB;
556
+ const va = a[metricCol] ?? 0;
557
+ const vb = b[metricCol] ?? 0;
558
+ return hib ? va - vb : vb - va;
559
+ });
560
+
561
+ // Build labels; prefix all with model_family when multiple families are shown
562
+ const rawLabels = picked.map(r => MODEL_SHORT[r[MODEL_COL]]);
563
+ const families = new Set(picked.map(r => r[FAMILY_COL]));
564
+ const needPrefix = families.size > 1;
565
+ const labels = rawLabels.map((lbl, i) => {
566
+ if (needPrefix) {
567
+ const fk = picked[i][FAMILY_COL] || "";
568
+ return lbl ? `${fk} ${lbl}` : fk;
569
+ }
570
+ return lbl;
571
+ });
572
  const data = picked.map(r => r[metricCol] === null ? 0 : r[metricCol]);
573
  const bgColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].bg);
574
  const borderColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].border);
 
653
  );
654
  });
655
 
656
+ // Hide metric columns where every value in the filtered data is zero or null
657
+ const visibleMetrics = config.metrics.filter(m =>
658
+ filtered.some(r => r[m.column] !== null && r[m.column] !== 0)
659
+ );
660
+
661
  // Build column list: Model + visible display cols + metrics
662
  const colDefs = [
663
  { key: MODEL_COL, label: "MODEL", isModel: true },
664
  ...visibleDisplay.map(dc => ({ key: dc.column, label: dc.label, description: dc.description || "" })),
665
+ ...visibleMetrics.map(m => ({ key: m.column, label: m.short || m.column, isMetric: true, description: m.description || "" })),
666
  ];
667
 
668
  // Resolve table_sort: family-specific overrides global
 
671
  const tableGroupBy = familyCfg.table_group_by || config.table_group_by || "";
672
  const tableGroupCols = Array.isArray(tableGroupBy) ? tableGroupBy : (tableGroupBy ? [tableGroupBy] : []);
673
 
674
+ // Determine active metric for sort (align with bar chart order)
675
+ const activeMetricCol = filters.metric;
676
+ const activeMetricCfg = config.metrics.find(m => m.column === activeMetricCol) || {};
677
+ const metricHib = activeMetricCfg.higher_is_better !== false;
678
+
679
  groupVals.forEach(gv => {
680
  const rows = filtered.filter(r => String(r[GROUP_BY]) === String(gv));
681
  if (!rows.length) return;
682
+ // Sort: group-by columns first (to keep groups together), OOM last,
683
+ // then model size β†’ Original first β†’ metric value within each group.
684
  rows.sort((a, b) => {
685
+ // 1. Keep table_group_by groups together
686
+ for (const col of tableGroupCols) {
 
 
 
 
 
 
687
  const av = a[col], bv = b[col];
688
+ if (av !== bv) {
689
+ if (av == null) return 1;
690
+ if (bv == null) return -1;
 
 
 
691
  const aNum = parseFloat(String(av));
692
  const bNum = parseFloat(String(bv));
693
  if (!isNaN(aNum) && !isNaN(bNum)) {
694
+ if (aNum !== bNum) return aNum - bNum;
695
+ } else {
696
+ const cmp = String(av).localeCompare(String(bv));
697
+ if (cmp !== 0) return cmp;
698
  }
 
 
699
  }
700
  }
701
+ // 2. OOM rows sink to the bottom of each group
702
+ const oomA = isOOMRow(a) ? 1 : 0;
703
+ const oomB = isOOMRow(b) ? 1 : 0;
704
+ if (oomA !== oomB) return oomA - oomB;
705
+ // 3. Model size β†’ Original first β†’ metric value
706
+ const sizeA = parseModelSize(a[FAMILY_COL] || a[MODEL_COL]);
707
+ const sizeB = parseModelSize(b[FAMILY_COL] || b[MODEL_COL]);
708
+ if (sizeA !== sizeB) return sizeA - sizeB;
709
+ const extA = isExternalModel(a[MODEL_COL]) ? 0 : 1;
710
+ const extB = isExternalModel(b[MODEL_COL]) ? 0 : 1;
711
+ if (extA !== extB) return extA - extB;
712
+ const va = a[activeMetricCol] ?? 0;
713
+ const vb = b[activeMetricCol] ?? 0;
714
+ return metricHib ? va - vb : vb - va;
715
  });
716
 
717
  // Track row group for break detection
 
945
  DATA = allRows;
946
  ALL_MODELS = [...new Set(DATA.map(r => r[MODEL_COL]))];
947
  MODEL_FAMILIES = detectFamilies();
948
+ // Rebuild display variants from detected model_family values, sorted by size
949
  bf.variants = Object.keys(MODEL_FAMILIES).filter(v =>
950
  deriveBaseFamily(v) === baseFamilyKey
951
+ ).sort((a, b) => parseModelSize(a) - parseModelSize(b));
952
  assignModelColors();
953
  renderSidebar();
954
  updateDependentFilters(true);
config.json CHANGED
@@ -5,6 +5,12 @@
5
  "model_family_column": "model_family",
6
  "model_link_prefix": "https://huggingface.co/",
7
  "optimized_org": "embedl",
 
 
 
 
 
 
8
  "filters": [
9
  {
10
  "column": "type",
@@ -218,6 +224,33 @@
218
  "orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.16.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
219
  },
220
  "default_device": "agx_orin"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  }
222
  },
223
  "accuracy_title": "Accuracy"
 
5
  "model_family_column": "model_family",
6
  "model_link_prefix": "https://huggingface.co/",
7
  "optimized_org": "embedl",
8
+ "base_names": {
9
+ "Llama-3.2-1B": "Llama-3.2-1B-Instruct",
10
+ "Llama-3.2-3B": "Llama-3.2-3B-Instruct",
11
+ "Gemma-3-1B": "gemma-3-1b-it",
12
+ "Gemma-3-270M": "gemma-3-270m-it"
13
+ },
14
  "filters": [
15
  {
16
  "column": "type",
 
224
  "orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.16.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
225
  },
226
  "default_device": "agx_orin"
227
+ },
228
+ "Llama-3.2": {
229
+ "data_file": "data/Llama-3.2.csv",
230
+ "experiment_setup": {
231
+ "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
232
+ "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
233
+ "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
234
+ },
235
+ "default_device": "agx_orin"
236
+ },
237
+ "Gemma-3": {
238
+ "data_file": "data/Gemma-3.csv",
239
+ "experiment_setup": {
240
+ "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
241
+ "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
242
+ "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
243
+ },
244
+ "default_device": "agx_orin"
245
+ },
246
+ "Qwen3": {
247
+ "data_file": "data/Qwen3.csv",
248
+ "experiment_setup": {
249
+ "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
250
+ "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
251
+ "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
252
+ },
253
+ "default_device": "agx_orin"
254
  }
255
  },
256
  "accuracy_title": "Accuracy"
data/Gemma-3.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
2
+ Gemma-3-1B,google/gemma-3-1b-it,text,1,agx_thor,N/A,N/A,N/A,3.2242,79.40,12.59,0.00
3
+ Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,agx_thor,N/A,N/A,N/A,3.0829,83.04,12.04,0.00
4
+ Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.6719,153.12,6.53,0.00
5
+ Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.8132,141.18,7.08,0.00
data/Llama-3.2.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
2
+ Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,agx_thor,N/A,N/A,N/A,3.3117,77.30,12.94,0.00
3
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,agx_thor,N/A,N/A,N/A,2.6227,97.61,10.24,0.00
4
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,0.8990,284.76,3.51,0.00
5
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.5758,162.46,6.16,0.00
6
+ Llama-3.2-3B,meta-llama/Llama-3.2-3B-Instruct,text,1,agx_thor,N/A,N/A,N/A,7.4843,34.21,29.24,0.00
7
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead,text,1,agx_thor,N/A,N/A,N/A,6.3909,40.06,24.96,0.00
8
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,2.2935,111.62,8.96,0.00
9
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-W4A16,text,1,agx_thor,N/A,N/A,N/A,3.4786,73.59,13.59,0.00
10
+ Llama-3.2-3B,meta-llama/Llama-3.2-3B-Instruct,text,1,agx_orin,N/A,N/A,N/A,9.9847,25.64,39.00,0.00
11
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead,text,1,agx_orin,N/A,N/A,N/A,9.1964,27.84,35.92,0.00
12
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,3.3957,75.39,13.26,0.00
13
+ Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-W4A16,text,1,agx_orin,N/A,N/A,N/A,4.1822,61.21,16.34,0.00
14
+ Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,agx_orin,N/A,N/A,N/A,3.9372,65.02,15.38,0.00
15
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,agx_orin,N/A,N/A,N/A,3.4045,75.19,13.30,0.00
16
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.3120,195.12,5.13,0.00
17
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.8475,138.57,7.22,0.00
18
+ Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,orin_nano_super,N/A,N/A,N/A,6.8469,37.39,26.75,0.00
19
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,orin_nano_super,N/A,N/A,N/A,5.8939,43.43,23.02,0.00
20
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,2.2979,111.41,8.98,0.00
21
+ Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,3.2571,78.60,12.72,0.00
data/Qwen3.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
2
+ Qwen3-1.7B,Qwen/Qwen3-1.7B,text,1,agx_thor,N/A,N/A,N/A,5.1915,49.31,20.28,0.00
3
+ Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead,text,1,agx_thor,N/A,N/A,N/A,4.1107,62.28,16.06,0.00
4
+ Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.3543,189.02,5.29,0.00
5
+ Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_thor,N/A,N/A,N/A,2.0541,124.63,8.02,0.00
6
+ Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_thor,N/A,N/A,N/A,1.8237,140.38,7.12,0.00
7
+ Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_thor,N/A,N/A,N/A,1.4494,176.63,5.66,0.00
8
+ Qwen3-1.7B,Qwen/Qwen3-1.7B,text,1,agx_orin,N/A,N/A,N/A,5.5862,45.83,21.82,0.00
9
+ Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,4.9568,51.65,19.36,0.00
10
+ Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.9020,134.59,7.43,0.00
11
+ Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_orin,N/A,N/A,N/A,2.5357,100.96,9.91,0.00
12
+ Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_orin,N/A,N/A,N/A,2.1783,117.52,8.51,0.00
13
+ Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,1.8865,135.70,7.37,0.00