Jonna Marie Matthiesen Claude Opus 4.6 (1M context) commited on
Commit Β·
b96722d
1
Parent(s): 7faf78c
Add Llama-3.2, Gemma-3, and Qwen3 benchmarks and improve chart rendering
Browse filesAdd benchmark data for three new model families (Llama-3.2, Gemma-3,
Qwen3) on NVIDIA edge devices using vLLM 0.19.0. Improve chart/table
UX by sorting bars by model size, hiding all-zero metrics, prefixing
labels when multiple families are shown, and supporting configurable
base display names.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
- app.js +84 -36
- config.json +33 -0
- data/Gemma-3.csv +5 -0
- data/Llama-3.2.csv +21 -0
- data/Qwen3.csv +13 -0
app.js
CHANGED
|
@@ -154,6 +154,11 @@ function detectFamilies() {
|
|
| 154 |
if (!families[fk]) families[fk] = { base: fk, models: [] };
|
| 155 |
if (!families[fk].models.includes(model)) families[fk].models.push(model);
|
| 156 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
} else {
|
| 158 |
const externalNames = ALL_MODELS.filter(isExternalModel).map(m => m.split("/").pop());
|
| 159 |
externalNames.sort((a, b) => b.length - a.length);
|
|
@@ -224,22 +229,20 @@ function assignModelColors() {
|
|
| 224 |
MODEL_SHORT[model] = suffix || (isExternalModel(model) ? "Original" : name);
|
| 225 |
});
|
| 226 |
});
|
| 227 |
-
//
|
| 228 |
-
|
| 229 |
-
for (const m of ALL_MODELS) {
|
| 230 |
-
const lbl = MODEL_SHORT[m];
|
| 231 |
-
if (!labelCounts[lbl]) labelCounts[lbl] = [];
|
| 232 |
-
labelCounts[lbl].push(m);
|
| 233 |
-
}
|
| 234 |
-
for (const [lbl, models] of Object.entries(labelCounts)) {
|
| 235 |
-
if (models.length > 1) {
|
| 236 |
-
models.forEach(m => { MODEL_SHORT[m] = m.split("/").pop(); });
|
| 237 |
-
}
|
| 238 |
-
}
|
| 239 |
}
|
| 240 |
|
| 241 |
// βββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
function isOOMRow(row) {
|
| 244 |
return config.metrics.every(m => row[m.column] === null);
|
| 245 |
}
|
|
@@ -505,10 +508,14 @@ function buildChart(filtered) {
|
|
| 505 |
|
| 506 |
chartHeader.appendChild(headerLeft);
|
| 507 |
|
| 508 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
const metricEl = metricGroup.querySelector(".btn-group");
|
| 510 |
renderBtnGroup(metricEl,
|
| 511 |
-
|
| 512 |
filters.metric
|
| 513 |
);
|
| 514 |
chartHeader.appendChild(metricGroup);
|
|
@@ -537,7 +544,31 @@ function buildChart(filtered) {
|
|
| 537 |
const picked = models.map(m => allRows.find(r => r[MODEL_COL] === m)).filter(Boolean);
|
| 538 |
if (!picked.length) return;
|
| 539 |
|
| 540 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
const data = picked.map(r => r[metricCol] === null ? 0 : r[metricCol]);
|
| 542 |
const bgColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].bg);
|
| 543 |
const borderColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].border);
|
|
@@ -622,11 +653,16 @@ function buildTables(filtered, chartsShown) {
|
|
| 622 |
);
|
| 623 |
});
|
| 624 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 625 |
// Build column list: Model + visible display cols + metrics
|
| 626 |
const colDefs = [
|
| 627 |
{ key: MODEL_COL, label: "MODEL", isModel: true },
|
| 628 |
...visibleDisplay.map(dc => ({ key: dc.column, label: dc.label, description: dc.description || "" })),
|
| 629 |
-
...
|
| 630 |
];
|
| 631 |
|
| 632 |
// Resolve table_sort: family-specific overrides global
|
|
@@ -635,35 +671,47 @@ function buildTables(filtered, chartsShown) {
|
|
| 635 |
const tableGroupBy = familyCfg.table_group_by || config.table_group_by || "";
|
| 636 |
const tableGroupCols = Array.isArray(tableGroupBy) ? tableGroupBy : (tableGroupBy ? [tableGroupBy] : []);
|
| 637 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
groupVals.forEach(gv => {
|
| 639 |
const rows = filtered.filter(r => String(r[GROUP_BY]) === String(gv));
|
| 640 |
if (!rows.length) return;
|
|
|
|
|
|
|
| 641 |
rows.sort((a, b) => {
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
const mul = rule.direction === "desc" ? -1 : 1;
|
| 645 |
-
if (rule.external_first && col === MODEL_COL) {
|
| 646 |
-
const aExt = isExternalModel(a[col]) ? 0 : 1;
|
| 647 |
-
const bExt = isExternalModel(b[col]) ? 0 : 1;
|
| 648 |
-
if (aExt !== bExt) return (aExt - bExt) * mul;
|
| 649 |
-
}
|
| 650 |
const av = a[col], bv = b[col];
|
| 651 |
-
if (av ==
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
if (typeof av === "number" && typeof bv === "number") {
|
| 655 |
-
if (av !== bv) return (av - bv) * mul;
|
| 656 |
-
} else {
|
| 657 |
const aNum = parseFloat(String(av));
|
| 658 |
const bNum = parseFloat(String(bv));
|
| 659 |
if (!isNaN(aNum) && !isNaN(bNum)) {
|
| 660 |
-
if (aNum !== bNum) return
|
|
|
|
|
|
|
|
|
|
| 661 |
}
|
| 662 |
-
const cmp = String(av).localeCompare(String(bv));
|
| 663 |
-
if (cmp !== 0) return cmp * mul;
|
| 664 |
}
|
| 665 |
}
|
| 666 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
});
|
| 668 |
|
| 669 |
// Track row group for break detection
|
|
@@ -897,10 +945,10 @@ async function switchBaseFamily(baseFamilyKey) {
|
|
| 897 |
DATA = allRows;
|
| 898 |
ALL_MODELS = [...new Set(DATA.map(r => r[MODEL_COL]))];
|
| 899 |
MODEL_FAMILIES = detectFamilies();
|
| 900 |
-
// Rebuild display variants from detected model_family values
|
| 901 |
bf.variants = Object.keys(MODEL_FAMILIES).filter(v =>
|
| 902 |
deriveBaseFamily(v) === baseFamilyKey
|
| 903 |
-
);
|
| 904 |
assignModelColors();
|
| 905 |
renderSidebar();
|
| 906 |
updateDependentFilters(true);
|
|
|
|
| 154 |
if (!families[fk]) families[fk] = { base: fk, models: [] };
|
| 155 |
if (!families[fk].models.includes(model)) families[fk].models.push(model);
|
| 156 |
});
|
| 157 |
+
// Use base_name from config if provided, otherwise fall back to family key
|
| 158 |
+
const baseNames = config.base_names || {};
|
| 159 |
+
for (const fk of Object.keys(families)) {
|
| 160 |
+
if (baseNames[fk]) families[fk].base = baseNames[fk];
|
| 161 |
+
}
|
| 162 |
} else {
|
| 163 |
const externalNames = ALL_MODELS.filter(isExternalModel).map(m => m.split("/").pop());
|
| 164 |
externalNames.sort((a, b) => b.length - a.length);
|
|
|
|
| 229 |
MODEL_SHORT[model] = suffix || (isExternalModel(model) ? "Original" : name);
|
| 230 |
});
|
| 231 |
});
|
| 232 |
+
// NOTE: duplicate short labels are resolved at chart render time
|
| 233 |
+
// so that the disambiguation depends on which models are actually visible.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
}
|
| 235 |
|
| 236 |
// βββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 237 |
|
| 238 |
+
/** Extract model size in billions from a string like "Llama-3.2-3B" or "Gemma-3-270M". */
|
| 239 |
+
function parseModelSize(s) {
|
| 240 |
+
const m = s.match(/(\d+(?:\.\d+)?)\s*(B|M|b|m)\b/);
|
| 241 |
+
if (!m) return 0;
|
| 242 |
+
const n = parseFloat(m[1]);
|
| 243 |
+
return m[2].toUpperCase() === "M" ? n / 1000 : n;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
function isOOMRow(row) {
|
| 247 |
return config.metrics.every(m => row[m.column] === null);
|
| 248 |
}
|
|
|
|
| 508 |
|
| 509 |
chartHeader.appendChild(headerLeft);
|
| 510 |
|
| 511 |
+
// Only show metric buttons for metrics that have non-zero data
|
| 512 |
+
const chartVisibleMetrics = config.metrics.filter(m =>
|
| 513 |
+
gRows.some(r => r[m.column] !== null && r[m.column] !== 0)
|
| 514 |
+
);
|
| 515 |
+
if (chartVisibleMetrics.length > 1) {
|
| 516 |
const metricEl = metricGroup.querySelector(".btn-group");
|
| 517 |
renderBtnGroup(metricEl,
|
| 518 |
+
chartVisibleMetrics.map(m => ({ value: m.column, label: m.short || m.column })),
|
| 519 |
filters.metric
|
| 520 |
);
|
| 521 |
chartHeader.appendChild(metricGroup);
|
|
|
|
| 544 |
const picked = models.map(m => allRows.find(r => r[MODEL_COL] === m)).filter(Boolean);
|
| 545 |
if (!picked.length) return;
|
| 546 |
|
| 547 |
+
// Sort bars: model size β Original first β metric value
|
| 548 |
+
const hib = metricCfg.higher_is_better !== false;
|
| 549 |
+
picked.sort((a, b) => {
|
| 550 |
+
const sizeA = parseModelSize(a[FAMILY_COL] || a[MODEL_COL]);
|
| 551 |
+
const sizeB = parseModelSize(b[FAMILY_COL] || b[MODEL_COL]);
|
| 552 |
+
if (sizeA !== sizeB) return sizeA - sizeB;
|
| 553 |
+
const extA = isExternalModel(a[MODEL_COL]) ? 0 : 1;
|
| 554 |
+
const extB = isExternalModel(b[MODEL_COL]) ? 0 : 1;
|
| 555 |
+
if (extA !== extB) return extA - extB;
|
| 556 |
+
const va = a[metricCol] ?? 0;
|
| 557 |
+
const vb = b[metricCol] ?? 0;
|
| 558 |
+
return hib ? va - vb : vb - va;
|
| 559 |
+
});
|
| 560 |
+
|
| 561 |
+
// Build labels; prefix all with model_family when multiple families are shown
|
| 562 |
+
const rawLabels = picked.map(r => MODEL_SHORT[r[MODEL_COL]]);
|
| 563 |
+
const families = new Set(picked.map(r => r[FAMILY_COL]));
|
| 564 |
+
const needPrefix = families.size > 1;
|
| 565 |
+
const labels = rawLabels.map((lbl, i) => {
|
| 566 |
+
if (needPrefix) {
|
| 567 |
+
const fk = picked[i][FAMILY_COL] || "";
|
| 568 |
+
return lbl ? `${fk} ${lbl}` : fk;
|
| 569 |
+
}
|
| 570 |
+
return lbl;
|
| 571 |
+
});
|
| 572 |
const data = picked.map(r => r[metricCol] === null ? 0 : r[metricCol]);
|
| 573 |
const bgColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].bg);
|
| 574 |
const borderColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].border);
|
|
|
|
| 653 |
);
|
| 654 |
});
|
| 655 |
|
| 656 |
+
// Hide metric columns where every value in the filtered data is zero or null
|
| 657 |
+
const visibleMetrics = config.metrics.filter(m =>
|
| 658 |
+
filtered.some(r => r[m.column] !== null && r[m.column] !== 0)
|
| 659 |
+
);
|
| 660 |
+
|
| 661 |
// Build column list: Model + visible display cols + metrics
|
| 662 |
const colDefs = [
|
| 663 |
{ key: MODEL_COL, label: "MODEL", isModel: true },
|
| 664 |
...visibleDisplay.map(dc => ({ key: dc.column, label: dc.label, description: dc.description || "" })),
|
| 665 |
+
...visibleMetrics.map(m => ({ key: m.column, label: m.short || m.column, isMetric: true, description: m.description || "" })),
|
| 666 |
];
|
| 667 |
|
| 668 |
// Resolve table_sort: family-specific overrides global
|
|
|
|
| 671 |
const tableGroupBy = familyCfg.table_group_by || config.table_group_by || "";
|
| 672 |
const tableGroupCols = Array.isArray(tableGroupBy) ? tableGroupBy : (tableGroupBy ? [tableGroupBy] : []);
|
| 673 |
|
| 674 |
+
// Determine active metric for sort (align with bar chart order)
|
| 675 |
+
const activeMetricCol = filters.metric;
|
| 676 |
+
const activeMetricCfg = config.metrics.find(m => m.column === activeMetricCol) || {};
|
| 677 |
+
const metricHib = activeMetricCfg.higher_is_better !== false;
|
| 678 |
+
|
| 679 |
groupVals.forEach(gv => {
|
| 680 |
const rows = filtered.filter(r => String(r[GROUP_BY]) === String(gv));
|
| 681 |
if (!rows.length) return;
|
| 682 |
+
// Sort: group-by columns first (to keep groups together), OOM last,
|
| 683 |
+
// then model size β Original first β metric value within each group.
|
| 684 |
rows.sort((a, b) => {
|
| 685 |
+
// 1. Keep table_group_by groups together
|
| 686 |
+
for (const col of tableGroupCols) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
const av = a[col], bv = b[col];
|
| 688 |
+
if (av !== bv) {
|
| 689 |
+
if (av == null) return 1;
|
| 690 |
+
if (bv == null) return -1;
|
|
|
|
|
|
|
|
|
|
| 691 |
const aNum = parseFloat(String(av));
|
| 692 |
const bNum = parseFloat(String(bv));
|
| 693 |
if (!isNaN(aNum) && !isNaN(bNum)) {
|
| 694 |
+
if (aNum !== bNum) return aNum - bNum;
|
| 695 |
+
} else {
|
| 696 |
+
const cmp = String(av).localeCompare(String(bv));
|
| 697 |
+
if (cmp !== 0) return cmp;
|
| 698 |
}
|
|
|
|
|
|
|
| 699 |
}
|
| 700 |
}
|
| 701 |
+
// 2. OOM rows sink to the bottom of each group
|
| 702 |
+
const oomA = isOOMRow(a) ? 1 : 0;
|
| 703 |
+
const oomB = isOOMRow(b) ? 1 : 0;
|
| 704 |
+
if (oomA !== oomB) return oomA - oomB;
|
| 705 |
+
// 3. Model size β Original first β metric value
|
| 706 |
+
const sizeA = parseModelSize(a[FAMILY_COL] || a[MODEL_COL]);
|
| 707 |
+
const sizeB = parseModelSize(b[FAMILY_COL] || b[MODEL_COL]);
|
| 708 |
+
if (sizeA !== sizeB) return sizeA - sizeB;
|
| 709 |
+
const extA = isExternalModel(a[MODEL_COL]) ? 0 : 1;
|
| 710 |
+
const extB = isExternalModel(b[MODEL_COL]) ? 0 : 1;
|
| 711 |
+
if (extA !== extB) return extA - extB;
|
| 712 |
+
const va = a[activeMetricCol] ?? 0;
|
| 713 |
+
const vb = b[activeMetricCol] ?? 0;
|
| 714 |
+
return metricHib ? va - vb : vb - va;
|
| 715 |
});
|
| 716 |
|
| 717 |
// Track row group for break detection
|
|
|
|
| 945 |
DATA = allRows;
|
| 946 |
ALL_MODELS = [...new Set(DATA.map(r => r[MODEL_COL]))];
|
| 947 |
MODEL_FAMILIES = detectFamilies();
|
| 948 |
+
// Rebuild display variants from detected model_family values, sorted by size
|
| 949 |
bf.variants = Object.keys(MODEL_FAMILIES).filter(v =>
|
| 950 |
deriveBaseFamily(v) === baseFamilyKey
|
| 951 |
+
).sort((a, b) => parseModelSize(a) - parseModelSize(b));
|
| 952 |
assignModelColors();
|
| 953 |
renderSidebar();
|
| 954 |
updateDependentFilters(true);
|
config.json
CHANGED
|
@@ -5,6 +5,12 @@
|
|
| 5 |
"model_family_column": "model_family",
|
| 6 |
"model_link_prefix": "https://huggingface.co/",
|
| 7 |
"optimized_org": "embedl",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"filters": [
|
| 9 |
{
|
| 10 |
"column": "type",
|
|
@@ -218,6 +224,33 @@
|
|
| 218 |
"orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.16.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
|
| 219 |
},
|
| 220 |
"default_device": "agx_orin"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
}
|
| 222 |
},
|
| 223 |
"accuracy_title": "Accuracy"
|
|
|
|
| 5 |
"model_family_column": "model_family",
|
| 6 |
"model_link_prefix": "https://huggingface.co/",
|
| 7 |
"optimized_org": "embedl",
|
| 8 |
+
"base_names": {
|
| 9 |
+
"Llama-3.2-1B": "Llama-3.2-1B-Instruct",
|
| 10 |
+
"Llama-3.2-3B": "Llama-3.2-3B-Instruct",
|
| 11 |
+
"Gemma-3-1B": "gemma-3-1b-it",
|
| 12 |
+
"Gemma-3-270M": "gemma-3-270m-it"
|
| 13 |
+
},
|
| 14 |
"filters": [
|
| 15 |
{
|
| 16 |
"column": "type",
|
|
|
|
| 224 |
"orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.16.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
|
| 225 |
},
|
| 226 |
"default_device": "agx_orin"
|
| 227 |
+
},
|
| 228 |
+
"Llama-3.2": {
|
| 229 |
+
"data_file": "data/Llama-3.2.csv",
|
| 230 |
+
"experiment_setup": {
|
| 231 |
+
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 232 |
+
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 233 |
+
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
|
| 234 |
+
},
|
| 235 |
+
"default_device": "agx_orin"
|
| 236 |
+
},
|
| 237 |
+
"Gemma-3": {
|
| 238 |
+
"data_file": "data/Gemma-3.csv",
|
| 239 |
+
"experiment_setup": {
|
| 240 |
+
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 241 |
+
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 242 |
+
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
|
| 243 |
+
},
|
| 244 |
+
"default_device": "agx_orin"
|
| 245 |
+
},
|
| 246 |
+
"Qwen3": {
|
| 247 |
+
"data_file": "data/Qwen3.csv",
|
| 248 |
+
"experiment_setup": {
|
| 249 |
+
"agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 250 |
+
"agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
|
| 251 |
+
"orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
|
| 252 |
+
},
|
| 253 |
+
"default_device": "agx_orin"
|
| 254 |
}
|
| 255 |
},
|
| 256 |
"accuracy_title": "Accuracy"
|
data/Gemma-3.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
|
| 2 |
+
Gemma-3-1B,google/gemma-3-1b-it,text,1,agx_thor,N/A,N/A,N/A,3.2242,79.40,12.59,0.00
|
| 3 |
+
Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,agx_thor,N/A,N/A,N/A,3.0829,83.04,12.04,0.00
|
| 4 |
+
Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.6719,153.12,6.53,0.00
|
| 5 |
+
Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.8132,141.18,7.08,0.00
|
data/Llama-3.2.csv
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
|
| 2 |
+
Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,agx_thor,N/A,N/A,N/A,3.3117,77.30,12.94,0.00
|
| 3 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,agx_thor,N/A,N/A,N/A,2.6227,97.61,10.24,0.00
|
| 4 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,0.8990,284.76,3.51,0.00
|
| 5 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.5758,162.46,6.16,0.00
|
| 6 |
+
Llama-3.2-3B,meta-llama/Llama-3.2-3B-Instruct,text,1,agx_thor,N/A,N/A,N/A,7.4843,34.21,29.24,0.00
|
| 7 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead,text,1,agx_thor,N/A,N/A,N/A,6.3909,40.06,24.96,0.00
|
| 8 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,2.2935,111.62,8.96,0.00
|
| 9 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-W4A16,text,1,agx_thor,N/A,N/A,N/A,3.4786,73.59,13.59,0.00
|
| 10 |
+
Llama-3.2-3B,meta-llama/Llama-3.2-3B-Instruct,text,1,agx_orin,N/A,N/A,N/A,9.9847,25.64,39.00,0.00
|
| 11 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead,text,1,agx_orin,N/A,N/A,N/A,9.1964,27.84,35.92,0.00
|
| 12 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,3.3957,75.39,13.26,0.00
|
| 13 |
+
Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-W4A16,text,1,agx_orin,N/A,N/A,N/A,4.1822,61.21,16.34,0.00
|
| 14 |
+
Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,agx_orin,N/A,N/A,N/A,3.9372,65.02,15.38,0.00
|
| 15 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,agx_orin,N/A,N/A,N/A,3.4045,75.19,13.30,0.00
|
| 16 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.3120,195.12,5.13,0.00
|
| 17 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.8475,138.57,7.22,0.00
|
| 18 |
+
Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,orin_nano_super,N/A,N/A,N/A,6.8469,37.39,26.75,0.00
|
| 19 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,orin_nano_super,N/A,N/A,N/A,5.8939,43.43,23.02,0.00
|
| 20 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,2.2979,111.41,8.98,0.00
|
| 21 |
+
Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,3.2571,78.60,12.72,0.00
|
data/Qwen3.csv
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
|
| 2 |
+
Qwen3-1.7B,Qwen/Qwen3-1.7B,text,1,agx_thor,N/A,N/A,N/A,5.1915,49.31,20.28,0.00
|
| 3 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead,text,1,agx_thor,N/A,N/A,N/A,4.1107,62.28,16.06,0.00
|
| 4 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.3543,189.02,5.29,0.00
|
| 5 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_thor,N/A,N/A,N/A,2.0541,124.63,8.02,0.00
|
| 6 |
+
Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_thor,N/A,N/A,N/A,1.8237,140.38,7.12,0.00
|
| 7 |
+
Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_thor,N/A,N/A,N/A,1.4494,176.63,5.66,0.00
|
| 8 |
+
Qwen3-1.7B,Qwen/Qwen3-1.7B,text,1,agx_orin,N/A,N/A,N/A,5.5862,45.83,21.82,0.00
|
| 9 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,4.9568,51.65,19.36,0.00
|
| 10 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.9020,134.59,7.43,0.00
|
| 11 |
+
Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_orin,N/A,N/A,N/A,2.5357,100.96,9.91,0.00
|
| 12 |
+
Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_orin,N/A,N/A,N/A,2.1783,117.52,8.51,0.00
|
| 13 |
+
Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,1.8865,135.70,7.37,0.00
|