Spaces:

embedl
/

Edge-Inference-Benchmarks

Running

Jonna Marie Matthiesen Claude Opus 4.6 (1M context) commited on Apr 15

Commit

b96722d

1 Parent(s): 7faf78c

Add Llama-3.2, Gemma-3, and Qwen3 benchmarks and improve chart rendering

Add benchmark data for three new model families (Llama-3.2, Gemma-3,
Qwen3) on NVIDIA edge devices using vLLM 0.19.0. Improve chart/table
UX by sorting bars by model size, hiding all-zero metrics, prefixing
labels when multiple families are shown, and supporting configurable
base display names.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (5) hide show

app.js +84 -36
config.json +33 -0
data/Gemma-3.csv +5 -0
data/Llama-3.2.csv +21 -0
data/Qwen3.csv +13 -0

app.js CHANGED Viewed

@@ -154,6 +154,11 @@ function detectFamilies() {
           if (!families[fk]) families[fk] = { base: fk, models: [] };
           if (!families[fk].models.includes(model)) families[fk].models.push(model);
       });
     } else {
         const externalNames = ALL_MODELS.filter(isExternalModel).map(m => m.split("/").pop());
         externalNames.sort((a, b) => b.length - a.length);
@@ -224,22 +229,20 @@ function assignModelColors() {
             MODEL_SHORT[model] = suffix || (isExternalModel(model) ? "Original" : name);
         });
     });
-    // Resolve duplicate short labels: use the model's short name instead
-    const labelCounts = {};
-    for (const m of ALL_MODELS) {
-        const lbl = MODEL_SHORT[m];
-        if (!labelCounts[lbl]) labelCounts[lbl] = [];
-        labelCounts[lbl].push(m);
-    }
-    for (const [lbl, models] of Object.entries(labelCounts)) {
-        if (models.length > 1) {
-            models.forEach(m => { MODEL_SHORT[m] = m.split("/").pop(); });
-        }
-    }
 }
 // ─── Helpers ──────────────────────────────────────────────────────────────────
 function isOOMRow(row) {
     return config.metrics.every(m => row[m.column] === null);
 }
@@ -505,10 +508,14 @@ function buildChart(filtered) {
     chartHeader.appendChild(headerLeft);
-    if (config.metrics.length > 1) {
         const metricEl = metricGroup.querySelector(".btn-group");
         renderBtnGroup(metricEl,
-            config.metrics.map(m => ({ value: m.column, label: m.short || m.column })),
             filters.metric
         );
         chartHeader.appendChild(metricGroup);
@@ -537,7 +544,31 @@ function buildChart(filtered) {
         const picked = models.map(m => allRows.find(r => r[MODEL_COL] === m)).filter(Boolean);
         if (!picked.length) return;
-        const labels = picked.map(r => MODEL_SHORT[r[MODEL_COL]]);
         const data = picked.map(r => r[metricCol] === null ? 0 : r[metricCol]);
         const bgColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].bg);
         const borderColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].border);
@@ -622,11 +653,16 @@ function buildTables(filtered, chartsShown) {
         );
     });
     // Build column list: Model + visible display cols + metrics
     const colDefs = [
         { key: MODEL_COL, label: "MODEL", isModel: true },
         ...visibleDisplay.map(dc => ({ key: dc.column, label: dc.label, description: dc.description || "" })),
-        ...config.metrics.map(m => ({ key: m.column, label: m.short || m.column, isMetric: true, description: m.description || "" })),
     ];
     // Resolve table_sort: family-specific overrides global
@@ -635,35 +671,47 @@ function buildTables(filtered, chartsShown) {
     const tableGroupBy = familyCfg.table_group_by || config.table_group_by || "";
     const tableGroupCols = Array.isArray(tableGroupBy) ? tableGroupBy : (tableGroupBy ? [tableGroupBy] : []);
     groupVals.forEach(gv => {
         const rows = filtered.filter(r => String(r[GROUP_BY]) === String(gv));
         if (!rows.length) return;
         rows.sort((a, b) => {
-            for (const rule of sortRules) {
-                const col = rule.column;
-                const mul = rule.direction === "desc" ? -1 : 1;
-                if (rule.external_first && col === MODEL_COL) {
-                    const aExt = isExternalModel(a[col]) ? 0 : 1;
-                    const bExt = isExternalModel(b[col]) ? 0 : 1;
-                    if (aExt !== bExt) return (aExt - bExt) * mul;
-                }
                 const av = a[col], bv = b[col];
-                if (av === bv || (av == null && bv == null)) continue;
-                if (av == null) return 1;
-                if (bv == null) return -1;
-                if (typeof av === "number" && typeof bv === "number") {
-                    if (av !== bv) return (av - bv) * mul;
-                } else {
                     const aNum = parseFloat(String(av));
                     const bNum = parseFloat(String(bv));
                     if (!isNaN(aNum) && !isNaN(bNum)) {
-                        if (aNum !== bNum) return (aNum - bNum) * mul;
                     }
-                    const cmp = String(av).localeCompare(String(bv));
-                    if (cmp !== 0) return cmp * mul;
                 }
             }
-            return 0;
         });
         // Track row group for break detection
@@ -897,10 +945,10 @@ async function switchBaseFamily(baseFamilyKey) {
     DATA = allRows;
     ALL_MODELS = [...new Set(DATA.map(r => r[MODEL_COL]))];
     MODEL_FAMILIES = detectFamilies();
-    // Rebuild display variants from detected model_family values
     bf.variants = Object.keys(MODEL_FAMILIES).filter(v =>
         deriveBaseFamily(v) === baseFamilyKey
-    );
     assignModelColors();
     renderSidebar();
     updateDependentFilters(true);

           if (!families[fk]) families[fk] = { base: fk, models: [] };
           if (!families[fk].models.includes(model)) families[fk].models.push(model);
       });
+      // Use base_name from config if provided, otherwise fall back to family key
+      const baseNames = config.base_names || {};
+      for (const fk of Object.keys(families)) {
+          if (baseNames[fk]) families[fk].base = baseNames[fk];
+      }
     } else {
         const externalNames = ALL_MODELS.filter(isExternalModel).map(m => m.split("/").pop());
         externalNames.sort((a, b) => b.length - a.length);
             MODEL_SHORT[model] = suffix || (isExternalModel(model) ? "Original" : name);
         });
     });
+    // NOTE: duplicate short labels are resolved at chart render time
+    // so that the disambiguation depends on which models are actually visible.
 }
 // ─── Helpers ──────────────────────────────────────────────────────────────────
+/** Extract model size in billions from a string like "Llama-3.2-3B" or "Gemma-3-270M". */
+function parseModelSize(s) {
+    const m = s.match(/(\d+(?:\.\d+)?)\s*(B|M|b|m)\b/);
+    if (!m) return 0;
+    const n = parseFloat(m[1]);
+    return m[2].toUpperCase() === "M" ? n / 1000 : n;
+}
 function isOOMRow(row) {
     return config.metrics.every(m => row[m.column] === null);
 }
     chartHeader.appendChild(headerLeft);
+    // Only show metric buttons for metrics that have non-zero data
+    const chartVisibleMetrics = config.metrics.filter(m =>
+        gRows.some(r => r[m.column] !== null && r[m.column] !== 0)
+    );
+    if (chartVisibleMetrics.length > 1) {
         const metricEl = metricGroup.querySelector(".btn-group");
         renderBtnGroup(metricEl,
+            chartVisibleMetrics.map(m => ({ value: m.column, label: m.short || m.column })),
             filters.metric
         );
         chartHeader.appendChild(metricGroup);
         const picked = models.map(m => allRows.find(r => r[MODEL_COL] === m)).filter(Boolean);
         if (!picked.length) return;
+        // Sort bars: model size → Original first → metric value
+        const hib = metricCfg.higher_is_better !== false;
+        picked.sort((a, b) => {
+            const sizeA = parseModelSize(a[FAMILY_COL] || a[MODEL_COL]);
+            const sizeB = parseModelSize(b[FAMILY_COL] || b[MODEL_COL]);
+            if (sizeA !== sizeB) return sizeA - sizeB;
+            const extA = isExternalModel(a[MODEL_COL]) ? 0 : 1;
+            const extB = isExternalModel(b[MODEL_COL]) ? 0 : 1;
+            if (extA !== extB) return extA - extB;
+            const va = a[metricCol] ?? 0;
+            const vb = b[metricCol] ?? 0;
+            return hib ? va - vb : vb - va;
+        });
+        // Build labels; prefix all with model_family when multiple families are shown
+        const rawLabels = picked.map(r => MODEL_SHORT[r[MODEL_COL]]);
+        const families = new Set(picked.map(r => r[FAMILY_COL]));
+        const needPrefix = families.size > 1;
+        const labels = rawLabels.map((lbl, i) => {
+            if (needPrefix) {
+                const fk = picked[i][FAMILY_COL] || "";
+                return lbl ? `${fk} ${lbl}` : fk;
+            }
+            return lbl;
+        });
         const data = picked.map(r => r[metricCol] === null ? 0 : r[metricCol]);
         const bgColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].bg);
         const borderColors = picked.map(r => MODEL_COLORS[r[MODEL_COL]].border);
         );
     });
+    // Hide metric columns where every value in the filtered data is zero or null
+    const visibleMetrics = config.metrics.filter(m =>
+        filtered.some(r => r[m.column] !== null && r[m.column] !== 0)
+    );
     // Build column list: Model + visible display cols + metrics
     const colDefs = [
         { key: MODEL_COL, label: "MODEL", isModel: true },
         ...visibleDisplay.map(dc => ({ key: dc.column, label: dc.label, description: dc.description || "" })),
+        ...visibleMetrics.map(m => ({ key: m.column, label: m.short || m.column, isMetric: true, description: m.description || "" })),
     ];
     // Resolve table_sort: family-specific overrides global
     const tableGroupBy = familyCfg.table_group_by || config.table_group_by || "";
     const tableGroupCols = Array.isArray(tableGroupBy) ? tableGroupBy : (tableGroupBy ? [tableGroupBy] : []);
+    // Determine active metric for sort (align with bar chart order)
+    const activeMetricCol = filters.metric;
+    const activeMetricCfg = config.metrics.find(m => m.column === activeMetricCol) || {};
+    const metricHib = activeMetricCfg.higher_is_better !== false;
     groupVals.forEach(gv => {
         const rows = filtered.filter(r => String(r[GROUP_BY]) === String(gv));
         if (!rows.length) return;
+        // Sort: group-by columns first (to keep groups together), OOM last,
+        // then model size → Original first → metric value within each group.
         rows.sort((a, b) => {
+            // 1. Keep table_group_by groups together
+            for (const col of tableGroupCols) {
                 const av = a[col], bv = b[col];
+                if (av !== bv) {
+                    if (av == null) return 1;
+                    if (bv == null) return -1;
                     const aNum = parseFloat(String(av));
                     const bNum = parseFloat(String(bv));
                     if (!isNaN(aNum) && !isNaN(bNum)) {
+                        if (aNum !== bNum) return aNum - bNum;
+                    } else {
+                        const cmp = String(av).localeCompare(String(bv));
+                        if (cmp !== 0) return cmp;
                     }
                 }
             }
+            // 2. OOM rows sink to the bottom of each group
+            const oomA = isOOMRow(a) ? 1 : 0;
+            const oomB = isOOMRow(b) ? 1 : 0;
+            if (oomA !== oomB) return oomA - oomB;
+            // 3. Model size → Original first → metric value
+            const sizeA = parseModelSize(a[FAMILY_COL] || a[MODEL_COL]);
+            const sizeB = parseModelSize(b[FAMILY_COL] || b[MODEL_COL]);
+            if (sizeA !== sizeB) return sizeA - sizeB;
+            const extA = isExternalModel(a[MODEL_COL]) ? 0 : 1;
+            const extB = isExternalModel(b[MODEL_COL]) ? 0 : 1;
+            if (extA !== extB) return extA - extB;
+            const va = a[activeMetricCol] ?? 0;
+            const vb = b[activeMetricCol] ?? 0;
+            return metricHib ? va - vb : vb - va;
         });
         // Track row group for break detection
     DATA = allRows;
     ALL_MODELS = [...new Set(DATA.map(r => r[MODEL_COL]))];
     MODEL_FAMILIES = detectFamilies();
+    // Rebuild display variants from detected model_family values, sorted by size
     bf.variants = Object.keys(MODEL_FAMILIES).filter(v =>
         deriveBaseFamily(v) === baseFamilyKey
+    ).sort((a, b) => parseModelSize(a) - parseModelSize(b));
     assignModelColors();
     renderSidebar();
     updateDependentFilters(true);

config.json CHANGED Viewed

@@ -5,6 +5,12 @@
   "model_family_column": "model_family",
   "model_link_prefix": "https://huggingface.co/",
   "optimized_org": "embedl",
   "filters": [
     {
       "column": "type",
@@ -218,6 +224,33 @@
         "orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.16.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
       },
       "default_device": "agx_orin"
     }
   },
   "accuracy_title": "Accuracy"

   "model_family_column": "model_family",
   "model_link_prefix": "https://huggingface.co/",
   "optimized_org": "embedl",
+  "base_names": {
+    "Llama-3.2-1B": "Llama-3.2-1B-Instruct",
+    "Llama-3.2-3B": "Llama-3.2-3B-Instruct",
+    "Gemma-3-1B": "gemma-3-1b-it",
+    "Gemma-3-270M": "gemma-3-270m-it"
+  },
   "filters": [
     {
       "column": "type",
         "orin_nano": "Measurement setup: NVIDIA AI IoT vLLM 0.16.0 tegra, 256 tokens generated, 10 warm-up runs, averaged over 25 runs."
       },
       "default_device": "agx_orin"
+    },
+    "Llama-3.2": {
+      "data_file": "data/Llama-3.2.csv",
+      "experiment_setup": {
+        "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
+        "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
+        "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
+      },
+      "default_device": "agx_orin"
+    },
+    "Gemma-3": {
+      "data_file": "data/Gemma-3.csv",
+      "experiment_setup": {
+        "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
+        "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
+        "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
+      },
+      "default_device": "agx_orin"
+    },
+    "Qwen3": {
+      "data_file": "data/Qwen3.csv",
+      "experiment_setup": {
+        "agx_thor": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 arm64, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
+        "agx_orin": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs.",
+        "orin_nano_super": "Measurement setup: NVIDIA AI IoT vLLM 0.19.0 tegra, 32 input tokens, 256 output tokens generated, 10 warm-up runs, averaged over 25 runs."
+      },
+      "default_device": "agx_orin"
     }
   },
   "accuracy_title": "Accuracy"

data/Gemma-3.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
+Gemma-3-1B,google/gemma-3-1b-it,text,1,agx_thor,N/A,N/A,N/A,3.2242,79.40,12.59,0.00
+Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead,text,1,agx_thor,N/A,N/A,N/A,3.0829,83.04,12.04,0.00
+Gemma-3-1B,embedl/gemma-3-1b-it-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.6719,153.12,6.53,0.00
+Gemma-3-1B,embedl/gemma-3-1b-it-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.8132,141.18,7.08,0.00

data/Llama-3.2.csv ADDED Viewed

	@@ -0,0 +1,21 @@

+model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
+Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,agx_thor,N/A,N/A,N/A,3.3117,77.30,12.94,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,agx_thor,N/A,N/A,N/A,2.6227,97.61,10.24,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,0.8990,284.76,3.51,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.5758,162.46,6.16,0.00
+Llama-3.2-3B,meta-llama/Llama-3.2-3B-Instruct,text,1,agx_thor,N/A,N/A,N/A,7.4843,34.21,29.24,0.00
+Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead,text,1,agx_thor,N/A,N/A,N/A,6.3909,40.06,24.96,0.00
+Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,2.2935,111.62,8.96,0.00
+Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-W4A16,text,1,agx_thor,N/A,N/A,N/A,3.4786,73.59,13.59,0.00
+Llama-3.2-3B,meta-llama/Llama-3.2-3B-Instruct,text,1,agx_orin,N/A,N/A,N/A,9.9847,25.64,39.00,0.00
+Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead,text,1,agx_orin,N/A,N/A,N/A,9.1964,27.84,35.92,0.00
+Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,3.3957,75.39,13.26,0.00
+Llama-3.2-3B,embedl/Llama-3.2-3B-Instruct-W4A16,text,1,agx_orin,N/A,N/A,N/A,4.1822,61.21,16.34,0.00
+Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,agx_orin,N/A,N/A,N/A,3.9372,65.02,15.38,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,agx_orin,N/A,N/A,N/A,3.4045,75.19,13.30,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.3120,195.12,5.13,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.8475,138.57,7.22,0.00
+Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct,text,1,orin_nano_super,N/A,N/A,N/A,6.8469,37.39,26.75,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead,text,1,orin_nano_super,N/A,N/A,N/A,5.8939,43.43,23.02,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-FlashHead-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,2.2979,111.41,8.98,0.00
+Llama-3.2-1B,embedl/Llama-3.2-1B-Instruct-W4A16,text,1,orin_nano_super,N/A,N/A,N/A,3.2571,78.60,12.72,0.00

data/Qwen3.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+model_family,model,type,batch,device,res,fps,frames,e2e,tps,tpot,ttft
+Qwen3-1.7B,Qwen/Qwen3-1.7B,text,1,agx_thor,N/A,N/A,N/A,5.1915,49.31,20.28,0.00
+Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead,text,1,agx_thor,N/A,N/A,N/A,4.1107,62.28,16.06,0.00
+Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,agx_thor,N/A,N/A,N/A,1.3543,189.02,5.29,0.00
+Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_thor,N/A,N/A,N/A,2.0541,124.63,8.02,0.00
+Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_thor,N/A,N/A,N/A,1.8237,140.38,7.12,0.00
+Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_thor,N/A,N/A,N/A,1.4494,176.63,5.66,0.00
+Qwen3-1.7B,Qwen/Qwen3-1.7B,text,1,agx_orin,N/A,N/A,N/A,5.5862,45.83,21.82,0.00
+Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,4.9568,51.65,19.36,0.00
+Qwen3-1.7B,embedl/Qwen3-1.7B-FlashHead-W4A16,text,1,agx_orin,N/A,N/A,N/A,1.9020,134.59,7.43,0.00
+Qwen3-1.7B,embedl/Qwen3-1.7B-W4A16,text,1,agx_orin,N/A,N/A,N/A,2.5357,100.96,9.91,0.00
+Qwen3-0.6B,Qwen/Qwen3-0.6B,text,1,agx_orin,N/A,N/A,N/A,2.1783,117.52,8.51,0.00
+Qwen3-0.6B,embedl/Qwen3-0.6B-FlashHead,text,1,agx_orin,N/A,N/A,N/A,1.8865,135.70,7.37,0.00