{ "Qwen3-30B-A3B-Instruct-2507": { "gsm8k": { "benchmark_name": "gsm8k", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1071.2940027174511, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1488.3645940190918, "accept_length": 2.6400593352844486 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1071.2940027174511, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1499.6157892300257, "accept_length": 3.0113471715954674 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1071.2940027174511, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1491.1759364152986, "accept_length": 2.525104073618391 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1071.2940027174511, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1438.3989235515564, "accept_length": 3.1488859094681736 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1071.2940027174511, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1478.3371126866896, "accept_length": 2.515156901620291 } ] } ] }, "math500": { "benchmark_name": "math500", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1468.9518188983302, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 3022.302541558449, "accept_length": 3.4018400160943374 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1468.9518188983302, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 3458.7683757488517, "accept_length": 4.5001277922609 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1468.9518188983302, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2710.0700446913434, "accept_length": 3.83069810232181 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1468.9518188983302, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 3636.1457092511932, "accept_length": 5.29297884876688 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1468.9518188983302, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2650.9994915668844, "accept_length": 3.981701201346221 } ] } ] }, "mtbench": { "benchmark_name": "mtbench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1341.3462205459145, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2048.689292397081, "accept_length": 2.495847913511255 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1341.3462205459145, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2086.117426859236, "accept_length": 2.831051301639537 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1341.3462205459145, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1698.4151046745978, "accept_length": 2.5572219713355357 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1341.3462205459145, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1998.1600180425269, "accept_length": 2.9819193324061195 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1341.3462205459145, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1742.9797705522778, "accept_length": 2.7422317575874455 } ] } ] }, "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1366.6183006362219, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2618.165602951494, "accept_length": 3.349328692192939 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1366.6183006362219, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2912.1392571686956, "accept_length": 4.384426363785289 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1366.6183006362219, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2367.016477367958, "accept_length": 3.7901897758795298 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1366.6183006362219, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 3069.9815866099266, "accept_length": 5.124267515923567 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1366.6183006362219, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2363.3377665362655, "accept_length": 4.030938739532834 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1492.6190597361915, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2911.405162351629, "accept_length": 3.1783624121672447 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1492.6190597361915, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 3265.2547245227543, "accept_length": 4.018270197787462 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1492.6190597361915, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2455.0885550482017, "accept_length": 3.295517305362425 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1492.6190597361915, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 3413.029275629196, "accept_length": 4.576331556763159 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1492.6190597361915, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2355.0941391264764, "accept_length": 3.3973067623684012 } ] } ] }, "financeqa": { "benchmark_name": "financeqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1320.1266846132082, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1778.9653109324079, "accept_length": 2.0810309937160505 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1320.1266846132082, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1778.6778684706662, "accept_length": 2.2730321793789288 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1320.1266846132082, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1652.1607344416184, "accept_length": 2.2703352879266276 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1320.1266846132082, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1682.9566856293293, "accept_length": 2.3032779273841584 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1320.1266846132082, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1753.6698041448958, "accept_length": 2.6092096546804138 } ] } ] }, "gpqa": { "benchmark_name": "gpqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1410.428038868636, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2237.792328921565, "accept_length": 2.5958448251993995 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1410.428038868636, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2341.298191039886, "accept_length": 3.0077922694984913 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1410.428038868636, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 1961.1700111065113, "accept_length": 2.6947097860315505 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1410.428038868636, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2310.2053834681674, "accept_length": 3.216540452331778 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1410.428038868636, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", "output_throughput": 2008.7425535412629, "accept_length": 2.91748293468006 } ] } ] } }, "Qwen3-235B-A22B-Instruct-2507": { "gsm8k": { "benchmark_name": "gsm8k", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 469.12940470010284, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 633.4834448509783, "accept_length": 2.356716526992789 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 718.620120234308, "accept_length": 2.8762828246719394 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 469.12940470010284, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 619.3961515217887, "accept_length": 2.5325967285309847 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 740.8090293617215, "accept_length": 3.351527622767857 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 469.12940470010284, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 685.8224688133159, "accept_length": 2.2254637464335056 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 718.5200251720828, "accept_length": 2.5942242348162705 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 469.12940470010284, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 622.6877352310961, "accept_length": 2.577754285484885 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 758.2839780669175, "accept_length": 3.51144398279758 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 469.12940470010284, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 696.9862910262393, "accept_length": 2.2957518385545184 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 692.54543613971, "accept_length": 2.508131344520406 } ] } ] }, "math500": { "benchmark_name": "math500", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 587.3767625807179, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 821.7716217768141, "accept_length": 2.2131311175007076 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1165.3481778903413, "accept_length": 3.2287879445239853 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 587.3767625807179, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 786.5291154131861, "accept_length": 2.3811060693210626 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1263.6658286467714, "accept_length": 4.021472447253628 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 587.3767625807179, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 729.1280796475185, "accept_length": 2.1641727527768047 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1012.7228976076004, "accept_length": 3.3166681444513406 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 587.3767625807179, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 801.9730196026575, "accept_length": 2.4202165987905055 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1399.195876342606, "accept_length": 4.477737029876627 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 587.3767625807179, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 728.5917394731794, "accept_length": 2.180077789251727 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 966.5149174357106, "accept_length": 3.0996346930308336 } ] } ] }, "mtbench": { "benchmark_name": "mtbench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 529.8952857212083, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 642.7287443329789, "accept_length": 1.8722335837366109 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 814.539845630713, "accept_length": 2.3454133346915906 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 529.8952857212083, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 617.9738942581079, "accept_length": 1.9436368219822697 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 779.531140147999, "accept_length": 2.571956737666924 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 529.8952857212083, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 579.7478777831109, "accept_length": 1.879637550849381 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 684.112380410899, "accept_length": 2.3538604252889965 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 529.8952857212083, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 607.3644823224199, "accept_length": 1.9674055586107704 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 789.9679697718769, "accept_length": 2.6698328935795956 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 529.8952857212083, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 596.0590450290033, "accept_length": 1.987328547838102 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 670.0058040199536, "accept_length": 2.329033512672587 } ] } ] }, "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 553.0503522362385, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 866.1813723921825, "accept_length": 2.533027363039563 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1068.373749600453, "accept_length": 3.238804311590177 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 553.0503522362385, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 853.4917713020631, "accept_length": 2.8369721532226433 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1176.5192650014792, "accept_length": 4.083723300745958 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 553.0503522362385, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 772.1684975661775, "accept_length": 2.5123042505592843 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1032.477913431608, "accept_length": 3.6360244115082825 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 553.0503522362385, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 889.8951303902317, "accept_length": 2.955997016746898 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1267.5178598410528, "accept_length": 4.4874762125186445 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 553.0503522362385, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 736.1010265214783, "accept_length": 2.3861131594156686 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 983.9906558013464, "accept_length": 3.412326127536581 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 598.1832041732818, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 803.7805606947842, "accept_length": 2.090690935434212 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1062.9796952555507, "accept_length": 2.8172381425652917 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 598.1832041732818, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 759.6333115912107, "accept_length": 2.2179516111790765 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1093.1979234549972, "accept_length": 3.268498808394456 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 598.1832041732818, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 708.4447966909656, "accept_length": 2.077364507787014 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 874.062642276262, "accept_length": 2.6670587896561795 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 598.1832041732818, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 767.8685797664081, "accept_length": 2.2474642743536366 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 1155.6572987907093, "accept_length": 3.490068495285106 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 598.1832041732818, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 711.4663371023372, "accept_length": 2.129619842542645 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 835.6105646149398, "accept_length": 2.590646146520392 } ] } ] }, "financeqa": { "benchmark_name": "financeqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 539.5161023038148, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 689.4282413740445, "accept_length": 1.941237358311274 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 872.4508905377182, "accept_length": 2.556773924332344 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 539.5161023038148, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 636.4408069963314, "accept_length": 2.027268079304664 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 885.529748337286, "accept_length": 2.8442245393804413 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 539.5161023038148, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 642.4958901994291, "accept_length": 2.0553746448296777 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 730.7331843587357, "accept_length": 2.4330876223070512 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 539.5161023038148, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 641.1037073226237, "accept_length": 2.0361251069493296 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 889.0304393086461, "accept_length": 2.965008914078923 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 539.5161023038148, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 654.3422430101997, "accept_length": 2.1356956699218137 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 742.3749721046132, "accept_length": 2.5176210584474528 } ] } ] }, "gpqa": { "benchmark_name": "gpqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 563.1619467852893, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 716.6967887897075, "accept_length": 2.0240035915598344 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 823.4218898853592, "accept_length": 2.356617214868455 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 563.1619467852893, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 680.2044274358036, "accept_length": 2.14011469258975 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 808.934577824737, "accept_length": 2.6032639643837037 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 563.1619467852893, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 630.9312870281678, "accept_length": 1.9776516235921864 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 698.9315763256182, "accept_length": 2.2587729126518172 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 563.1619467852893, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 685.8554308455039, "accept_length": 2.1591340093176212 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 826.5168292170538, "accept_length": 2.6672259363465063 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 563.1619467852893, "accept_length": 1.0 }, { "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", "output_throughput": 636.0480501999019, "accept_length": 2.001480647431386 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", "output_throughput": 683.7427107159214, "accept_length": 2.241436629482574 } ] } ] } }, "Qwen3-Next-80B-A3B-Instruct-FP8": { "gsm8k": { "benchmark_name": "gsm8k", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 549.6362180919164, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 683.8795985073891, "accept_length": 3.13391215089175 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 549.6362180919164, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 753.237074543623, "accept_length": 3.9038018228889597 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 549.6362180919164, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 746.7222279174218, "accept_length": 4.022678679117706 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 549.6362180919164, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 771.153101164556, "accept_length": 4.345554699994077 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 549.6362180919164, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 773.4012327870145, "accept_length": 4.607604467310829 } ] } ] }, "math500": { "benchmark_name": "math500", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 863.7773324206034, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1478.3001038430784, "accept_length": 3.498551418454351 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 863.7773324206034, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1764.2064514729698, "accept_length": 4.677160426045899 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 863.7773324206034, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1758.0166003158934, "accept_length": 4.755809947207558 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 863.7773324206034, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1912.6838622508392, "accept_length": 5.554967332076544 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 863.7773324206034, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1853.434631732593, "accept_length": 5.756492370623537 } ] } ] }, "mtbench": { "benchmark_name": "mtbench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 803.4970369348379, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1095.5102974622082, "accept_length": 2.581125058112506 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 803.4970369348379, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1157.636689246293, "accept_length": 2.9156972910237133 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 803.4970369348379, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1197.112468072539, "accept_length": 3.1331585165547646 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 803.4970369348379, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1127.4364940073876, "accept_length": 3.0475279197966354 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 803.4970369348379, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1198.9417562126052, "accept_length": 3.4190589216409535 } ] } ] }, "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 788.4509521573036, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1245.6702060145312, "accept_length": 3.4647713687985653 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 788.4509521573036, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1527.7120587214345, "accept_length": 4.612265133111893 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 788.4509521573036, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1536.7723048769212, "accept_length": 4.676180904522613 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 788.4509521573036, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1628.1293604862747, "accept_length": 5.4577785667790994 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 788.4509521573036, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1629.7244930267507, "accept_length": 5.621873496873497 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 916.0337036761792, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1463.1234977160723, "accept_length": 3.1058026902179443 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 916.0337036761792, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1724.2207417984275, "accept_length": 3.8462516284893944 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 916.0337036761792, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1734.4894352951553, "accept_length": 3.9821418050654955 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 916.0337036761792, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1786.8774464735384, "accept_length": 4.2761952310299485 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 916.0337036761792, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1829.5532782765572, "accept_length": 4.590307145700787 } ] } ] }, "financeqa": { "benchmark_name": "financeqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 827.3050477430119, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 986.4282909200625, "accept_length": 2.0752097090844193 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 827.3050477430119, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 981.0983772859984, "accept_length": 2.1801329261720857 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 827.3050477430119, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1057.6549922432027, "accept_length": 2.439575219817722 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 827.3050477430119, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 956.6098887389447, "accept_length": 2.2457481515800852 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 827.3050477430119, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1041.5277102267419, "accept_length": 2.606484877248997 } ] } ] }, "gpqa": { "benchmark_name": "gpqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 909.8620481543201, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1368.9499756838852, "accept_length": 2.7362548025140208 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 909.8620481543201, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1457.9918429280988, "accept_length": 3.1803662497541225 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 909.8620481543201, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1511.274616068283, "accept_length": 3.3682366894832594 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 909.8620481543201, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1463.9444559000415, "accept_length": 3.380290412894046 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 909.8620481543201, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", "output_throughput": 1541.4580844550508, "accept_length": 3.7385501251645787 } ] } ] } }, "Qwen3-Coder-30B-A3B-Instruct": { "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1296.1854608851213, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 2621.7139434700584, "accept_length": 3.394971072541166 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1296.1854608851213, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 2966.4459091363574, "accept_length": 4.5011526953450725 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1296.1854608851213, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 2236.868611380527, "accept_length": 3.9489230027326796 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1296.1854608851213, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 3205.2025971977832, "accept_length": 5.306789266712931 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1296.1854608851213, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 2553.012134540716, "accept_length": 4.221071958746777 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1506.2936922288973, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 2992.02067556649, "accept_length": 3.138553878632709 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1506.2936922288973, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 3328.9058789398114, "accept_length": 3.9449129401751835 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1506.2936922288973, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 2541.3931549111803, "accept_length": 3.336379596827288 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1506.2936922288973, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 3472.3919294148427, "accept_length": 4.477776008915068 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 1506.2936922288973, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", "output_throughput": 2552.5518885328293, "accept_length": 3.5865930607956185 } ] } ] } }, "Qwen3-Coder-480B-A35B-Instruct": { "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 470.6571664751315, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 867.5261370310272, "accept_length": 3.4954686382065345 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 470.6571664751315, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 1044.4475556194586, "accept_length": 4.68614810868407 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 470.6571664751315, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 945.2207076385645, "accept_length": 4.2835241878943675 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 470.6571664751315, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 1165.0727231905212, "accept_length": 5.626203379024545 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 470.6571664751315, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 956.5336674844815, "accept_length": 4.574128043621322 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.99996954994094, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 846.6405796214389, "accept_length": 3.0936425388083757 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.99996954994094, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 946.3806786937351, "accept_length": 3.8547162126548313 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.99996954994094, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 817.5432981932123, "accept_length": 3.3539182909649066 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.99996954994094, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 983.2554936551461, "accept_length": 4.260473117512835 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.99996954994094, "accept_length": 1.0 }, { "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", "output_throughput": 790.2818911646486, "accept_length": 3.379611891844464 } ] } ] } }, "Kimi-K2-Instruct": { "gsm8k": { "benchmark_name": "gsm8k", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 337.92445122816076, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 498.355967400969, "accept_length": 3.271389121751566 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 337.92445122816076, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 538.7660861191819, "accept_length": 4.120435815920245 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 337.92445122816076, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 476.5166831456105, "accept_length": 3.5748305647840533 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 337.92445122816076, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 544.16588655688, "accept_length": 4.655279611582661 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 337.92445122816076, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 459.1757114935756, "accept_length": 3.4419677544677545 } ] } ] }, "math500": { "benchmark_name": "math500", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 492.06079685961566, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 877.2113745892083, "accept_length": 3.46806357521281 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 492.06079685961566, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 995.8769550545389, "accept_length": 4.610169876195772 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 492.06079685961566, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 772.6100737625807, "accept_length": 3.527844083399639 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 492.06079685961566, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 1022.7285831443611, "accept_length": 5.383128673454291 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 492.06079685961566, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 649.083231514055, "accept_length": 3.1435862587473253 } ] } ] }, "mtbench": { "benchmark_name": "mtbench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 430.9240376244664, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 533.8166177911393, "accept_length": 2.3897198230461343 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 430.9240376244664, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 526.1187611377575, "accept_length": 2.738876732312181 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 430.9240376244664, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 473.3129895327435, "accept_length": 2.394141207153502 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 430.9240376244664, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 488.46384825810924, "accept_length": 2.7821796546219706 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 430.9240376244664, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 451.126180366313, "accept_length": 2.536454493323503 } ] } ] }, "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 466.0584238730984, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 779.7838793636296, "accept_length": 3.364936827816644 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 466.0584238730984, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 868.550857852841, "accept_length": 4.423030465709301 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 466.0584238730984, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 729.1217213710999, "accept_length": 3.7321711568938194 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 466.0584238730984, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 897.9039799990946, "accept_length": 5.162398550153652 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 466.0584238730984, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 669.271164663664, "accept_length": 3.7044178210408085 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.12137141510016, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 841.5023790421864, "accept_length": 3.162685632492396 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.12137141510016, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 904.3910288246204, "accept_length": 3.943605886942718 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.12137141510016, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 716.7319007181034, "accept_length": 3.1374681580049573 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.12137141510016, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 896.7006322822839, "accept_length": 4.400262176061309 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 500.12137141510016, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 650.4333056536461, "accept_length": 3.0780193205478037 } ] } ] }, "financeqa": { "benchmark_name": "financeqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 433.44658979995484, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 647.3644717982133, "accept_length": 2.9848269628099175 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 433.44658979995484, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 660.0254297132984, "accept_length": 3.594056395834917 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 433.44658979995484, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 523.0340443308603, "accept_length": 2.8796471741261027 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 433.44658979995484, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 630.5425124127137, "accept_length": 3.944647875329984 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 433.44658979995484, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 389.47080223360666, "accept_length": 2.5096594789735582 } ] } ] }, "gpqa": { "benchmark_name": "gpqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 505.3742994094499, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 783.436424568974, "accept_length": 2.904452196823693 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 505.3742994094499, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 811.3642458480507, "accept_length": 3.4622853609057755 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 505.3742994094499, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 699.8111934038128, "accept_length": 3.0198274205132876 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 505.3742994094499, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 770.4892578818251, "accept_length": 3.6995331477421103 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 505.3742994094499, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", "output_throughput": 596.3162033813331, "accept_length": 2.7901899604967983 } ] } ] } }, "Ling-flash-2.0": { "gsm8k": { "benchmark_name": "gsm8k", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 674.3464018618124, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1144.7606179148752, "accept_length": 3.4351661916604646 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 674.3464018618124, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1253.4000030615975, "accept_length": 4.487906489549112 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 674.3464018618124, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1059.7381115819003, "accept_length": 3.331830155824441 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 674.3464018618124, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1323.0093663978187, "accept_length": 5.148644964283767 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 674.3464018618124, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1026.8025294413142, "accept_length": 3.126593214481735 } ] } ] }, "math500": { "benchmark_name": "math500", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 762.7113399535667, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1434.6065070935829, "accept_length": 3.4340471141971713 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 762.7113399535667, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1607.3212268988339, "accept_length": 4.493397164127635 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 762.7113399535667, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1383.6720582197756, "accept_length": 3.7931376508179415 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 762.7113399535667, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1685.5692612687462, "accept_length": 5.218245374511558 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 762.7113399535667, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1330.1086623703009, "accept_length": 3.793696144088135 } ] } ] }, "mtbench": { "benchmark_name": "mtbench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 728.5278345617202, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1022.5890920470158, "accept_length": 2.392568385378843 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 728.5278345617202, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 990.0430932236113, "accept_length": 2.648161574313827 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 728.5278345617202, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 914.3899001110539, "accept_length": 2.5161251562049407 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 728.5278345617202, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 942.3914903299366, "accept_length": 2.771332137960131 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 728.5278345617202, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 968.0479918450316, "accept_length": 2.8558805412179527 } ] } ] }, "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 740.2477168580639, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1271.2889448808319, "accept_length": 3.1471241394625804 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 740.2477168580639, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1353.1437889143726, "accept_length": 3.9318483282257697 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 740.2477168580639, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1175.4192382338058, "accept_length": 3.29687986547923 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 740.2477168580639, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1358.9726439538854, "accept_length": 4.370163501574083 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 740.2477168580639, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1141.7913416362687, "accept_length": 3.3590013964490297 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 770.3957537752161, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1305.1833791876973, "accept_length": 2.9790301516097895 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 770.3957537752161, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1366.417326281792, "accept_length": 3.6103649876590875 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 770.3957537752161, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1130.7868943433502, "accept_length": 2.8933133857317164 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 770.3957537752161, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1345.6741018953574, "accept_length": 3.9330923185867093 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 770.3957537752161, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1061.6897228931932, "accept_length": 2.902182106883942 } ] } ] }, "financeqa": { "benchmark_name": "financeqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 747.7098566179897, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 863.8565336005082, "accept_length": 1.907102314310342 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 747.7098566179897, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 833.1235940586521, "accept_length": 2.047546254809973 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 747.7098566179897, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 798.9811798480557, "accept_length": 1.9372590117256243 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 747.7098566179897, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 763.2761511276084, "accept_length": 2.0470985454359427 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 747.7098566179897, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 779.3060665006524, "accept_length": 2.045476819601249 } ] } ] }, "gpqa": { "benchmark_name": "gpqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 794.1289733679167, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1185.7250147683403, "accept_length": 2.562389392369937 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 794.1289733679167, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1161.8732670284553, "accept_length": 2.886871902842324 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 794.1289733679167, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1052.640023467198, "accept_length": 2.6017604302340236 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 794.1289733679167, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1111.996259596397, "accept_length": 3.0648124985786733 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 794.1289733679167, "accept_length": 1.0 }, { "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", "output_throughput": 1004.4992021266573, "accept_length": 2.6709053367549105 } ] } ] } }, "Llama-3.1-8B-Instruct": { "gsm8k": { "benchmark_name": "gsm8k", "results": [ { "batch_size": 1, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 181.81151788749455, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 228.64232714994796, "accept_length": 1.7165139181419709 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 321.2528041157779, "accept_length": 2.5481878001819607 } ] }, { "batch_size": 1, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 181.81151788749455, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 213.550264904667, "accept_length": 1.7634936642258956 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 329.6873220645443, "accept_length": 2.8537845395516377 } ] }, { "batch_size": 1, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 181.81151788749455, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 195.13619448514442, "accept_length": 1.7528912619638426 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 251.43922505539766, "accept_length": 2.2820562939796716 } ] }, { "batch_size": 1, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 181.81151788749455, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 197.901650893672, "accept_length": 1.7742552127753433 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 317.61058794222197, "accept_length": 2.9733251079580505 } ] }, { "batch_size": 1, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 181.81151788749455, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 182.0257072155964, "accept_length": 1.789228234172427 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 240.85801894998306, "accept_length": 2.367398432594591 } ] } ] }, "math500": { "benchmark_name": "math500", "results": [ { "batch_size": 1, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 191.04076784280642, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 399.2995452070592, "accept_length": 2.7825411590459592 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 492.28246574028134, "accept_length": 3.4786948176583494 } ] }, { "batch_size": 1, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 191.04076784280642, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 422.40466722576286, "accept_length": 3.254684892147128 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 594.5033645961273, "accept_length": 4.624857400180126 } ] }, { "batch_size": 1, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 191.04076784280642, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 387.0489467031037, "accept_length": 3.3070174292508296 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 480.43534296060534, "accept_length": 4.116159164796923 } ] }, { "batch_size": 1, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 191.04076784280642, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 413.57783551553456, "accept_length": 3.489213277012106 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 638.0439777096752, "accept_length": 5.402844266750837 } ] }, { "batch_size": 1, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 191.04076784280642, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 326.8790406711244, "accept_length": 3.072066504990206 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 453.306808098541, "accept_length": 4.25573095185686 } ] } ] }, "mtbench": { "benchmark_name": "mtbench", "results": [ { "batch_size": 1, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.98120707576373, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 414.90616666264776, "accept_length": 2.930670028119849 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 404.24667749722187, "accept_length": 2.8980726819445777 } ] }, { "batch_size": 1, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.98120707576373, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 453.73692243041774, "accept_length": 3.554148008484563 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 446.6366476858434, "accept_length": 3.5164393144456105 } ] }, { "batch_size": 1, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.98120707576373, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 338.6308027570883, "accept_length": 2.9393909722902185 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 346.46724606666106, "accept_length": 3.0061221366256823 } ] }, { "batch_size": 1, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.98120707576373, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 454.730035166582, "accept_length": 3.906676145543851 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 450.03198538047087, "accept_length": 3.855839765261211 } ] }, { "batch_size": 1, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.98120707576373, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 305.1648971387325, "accept_length": 2.9089536379397125 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 308.00561770283963, "accept_length": 2.938163437236731 } ] } ] }, "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 1, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.91017930680567, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 432.8677712430711, "accept_length": 3.0469174293472796 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 465.1765542307934, "accept_length": 3.3398192040568846 } ] }, { "batch_size": 1, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.91017930680567, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 479.1212006261437, "accept_length": 3.7445769729930163 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 548.9370103875078, "accept_length": 4.318366474235621 } ] }, { "batch_size": 1, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.91017930680567, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 340.2704451839945, "accept_length": 2.9425913908717285 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 377.47349118830954, "accept_length": 3.2519286521546853 } ] }, { "batch_size": 1, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.91017930680567, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 480.3152659024827, "accept_length": 4.0959237477185155 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 571.4886457684788, "accept_length": 4.910129659643436 } ] }, { "batch_size": 1, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.91017930680567, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 311.1051926955927, "accept_length": 2.9338537387017256 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 330.15665770360005, "accept_length": 3.126203604641593 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 1, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.70410640395912, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 380.6915537026263, "accept_length": 2.6893540748536475 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 439.67672671912396, "accept_length": 3.16861704188786 } ] }, { "batch_size": 1, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.70410640395912, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 398.3738662742165, "accept_length": 3.1199565043209523 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 506.22686693578754, "accept_length": 3.9957244075250427 } ] }, { "batch_size": 1, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.70410640395912, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 322.29847741557273, "accept_length": 2.771756050751679 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 375.34956052924895, "accept_length": 3.236171472299629 } ] }, { "batch_size": 1, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.70410640395912, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 391.25705242634194, "accept_length": 3.334862665932587 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 516.904537338255, "accept_length": 4.466856034741759 } ] }, { "batch_size": 1, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 189.70410640395912, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 287.68205157705233, "accept_length": 2.7148899046029547 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 378.8468257829908, "accept_length": 3.585376494197714 } ] } ] }, "financeqa": { "benchmark_name": "financeqa", "results": [ { "batch_size": 1, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 185.6534194378935, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 237.18050733350836, "accept_length": 1.713236561734993 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 258.6437346257605, "accept_length": 1.9050339301460721 } ] }, { "batch_size": 1, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 185.6534194378935, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 226.67848476067016, "accept_length": 1.8075300109130592 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 254.48969338840087, "accept_length": 2.043805528134255 } ] }, { "batch_size": 1, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 185.6534194378935, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 210.94791438286492, "accept_length": 1.8654798891594593 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 251.07710462288492, "accept_length": 2.2264818220398923 } ] }, { "batch_size": 1, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 185.6534194378935, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 211.18454065719607, "accept_length": 1.8434056761268782 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 240.6034453504167, "accept_length": 2.1029710512950737 } ] }, { "batch_size": 1, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 185.6534194378935, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 183.72672690273865, "accept_length": 1.7817737292479987 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 229.82170237350869, "accept_length": 2.250341575212658 } ] } ] }, "gpqa": { "benchmark_name": "gpqa", "results": [ { "batch_size": 1, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.4500188461883, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 409.86415544506445, "accept_length": 2.8552892726009724 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 442.54523731909666, "accept_length": 3.135712400558006 } ] }, { "batch_size": 1, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.4500188461883, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 438.0519648397228, "accept_length": 3.3792158666871135 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 507.1290934019136, "accept_length": 3.936040126357265 } ] }, { "batch_size": 1, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.4500188461883, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 352.1689105895484, "accept_length": 3.026258098612226 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 413.1686528229548, "accept_length": 3.5475168823860437 } ] }, { "batch_size": 1, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.4500188461883, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 434.1788724748705, "accept_length": 3.6819800875461333 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 514.2312383540044, "accept_length": 4.357665531437638 } ] }, { "batch_size": 1, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 190.4500188461883, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", "output_throughput": 311.5910755177637, "accept_length": 2.9283727399165507 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", "output_throughput": 390.64506651929287, "accept_length": 3.692280754414928 } ] } ] } }, "Llama-3.3-70B-Instruct": { "gsm8k": { "benchmark_name": "gsm8k", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 453.2156138501392, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 521.4502791575164, "accept_length": 1.2760798037239203 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Spec for ge", "output_throughput": 837.9426300003847, "accept_length": 2.3179247901200304 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 453.2156138501392, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 500.5534332009228, "accept_length": 1.2836005168205962 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 855.6400225608106, "accept_length": 2.4851382017038057 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 453.2156138501392, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 500.33326156436937, "accept_length": 1.3482255389718076 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 758.9001336688345, "accept_length": 2.12511673151751 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 453.2156138501392, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 483.12653680688, "accept_length": 1.2856745693167546 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 820.5175400063332, "accept_length": 2.516910489405022 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 453.2156138501392, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 480.4218686725539, "accept_length": 1.3936331604189096 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 739.405741336959, "accept_length": 2.222061210294459 } ] } ] }, "math500": { "benchmark_name": "math500", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 567.3739460148672, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1088.844896763402, "accept_length": 2.3720131878590123 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1273.7733416283656, "accept_length": 2.841736535013628 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 567.3739460148672, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1122.2476729474943, "accept_length": 2.5920045204124875 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1382.9357431087456, "accept_length": 3.243898689873717 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 567.3739460148672, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1112.8479569335152, "accept_length": 2.792588962605549 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1274.2110431983278, "accept_length": 3.2416170775479363 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 567.3739460148672, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1079.9951811356827, "accept_length": 2.6718376973892366 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1327.6044700788502, "accept_length": 3.3766338373668217 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 567.3739460148672, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1090.3170854344964, "accept_length": 2.966812280063099 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1215.8347875575441, "accept_length": 3.3641021480547684 } ] } ] }, "mtbench": { "benchmark_name": "mtbench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 540.4640557255416, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1234.647877556777, "accept_length": 2.9232673267326734 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1238.4736758319698, "accept_length": 2.9606951984177083 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 540.4640557255416, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1377.8052334866013, "accept_length": 3.5324281309061973 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1409.5100765643524, "accept_length": 3.6175162329362442 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 540.4640557255416, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1129.6661036217977, "accept_length": 3.143848893296669 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1108.3072501756835, "accept_length": 3.2248797608215263 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 540.4640557255416, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1425.2993761886291, "accept_length": 3.8789368991048736 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1440.3671955624673, "accept_length": 3.97791186891054 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 540.4640557255416, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1069.4986663607351, "accept_length": 3.1943331425300516 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1033.773238205561, "accept_length": 3.2422141262192974 } ] } ] }, "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.9500728009846, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1194.0875984832494, "accept_length": 2.6663626344392504 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1290.1122375104421, "accept_length": 2.925804965875309 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.9500728009846, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1282.7936401185236, "accept_length": 3.0671719811813904 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1426.372333907719, "accept_length": 3.436568804650481 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.9500728009846, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1090.1088508973057, "accept_length": 2.8127895941495002 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1174.0867819009864, "accept_length": 3.0611013660766493 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.9500728009846, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1267.8737053510965, "accept_length": 3.1906793120660706 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1407.8140138598972, "accept_length": 3.6735002608242047 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.9500728009846, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1013.2705272855593, "accept_length": 2.7776112847805305 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 968.2027451202639, "accept_length": 2.742653690956563 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.8834615148919, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1210.6010917932015, "accept_length": 2.723797958423008 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1295.014267720614, "accept_length": 2.952023346303502 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.8834615148919, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1303.4195570335166, "accept_length": 3.133414966360772 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1423.2736941362525, "accept_length": 3.4980468448438247 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.8834615148919, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1070.711661408102, "accept_length": 2.735034762087001 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1154.785652335772, "accept_length": 2.9811645516106386 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.8834615148919, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1279.5345355421975, "accept_length": 3.284394784770605 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1399.3991191944933, "accept_length": 3.716324359708698 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 560.8834615148919, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 1013.3765756840332, "accept_length": 2.773990564681233 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1035.4140338795994, "accept_length": 2.933293078243183 } ] } ] }, "financeqa": { "benchmark_name": "financeqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 512.5751663875466, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 704.0737829344649, "accept_length": 1.645732050137249 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 936.4940018423655, "accept_length": 2.2541347317466722 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 512.5751663875466, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 684.0195321200449, "accept_length": 1.702027072988232 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 933.0572305312112, "accept_length": 2.39442380929992 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 512.5751663875466, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 618.4946534541955, "accept_length": 1.7860533893688224 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 700.886442439991, "accept_length": 2.281622206910129 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 512.5751663875466, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 652.1412786559076, "accept_length": 1.7116903633491312 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 887.7001871678323, "accept_length": 2.452738257649581 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 512.5751663875466, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 635.2599880909434, "accept_length": 1.9610333607746286 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 854.0347909075315, "accept_length": 2.589833798374378 } ] } ] }, "gpqa": { "benchmark_name": "gpqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 575.6879373469175, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 962.5545831639148, "accept_length": 2.0451300999292217 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1020.0538308626681, "accept_length": 2.1911976817371235 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 575.6879373469175, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 963.8356757692138, "accept_length": 2.1687507495755036 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 1039.643962895085, "accept_length": 2.3552079123829617 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 575.6879373469175, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 890.1003387342033, "accept_length": 2.226321240698847 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 960.5616523564485, "accept_length": 2.4811411267352264 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 575.6879373469175, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 916.6826693888017, "accept_length": 2.1849745643049188 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 984.4877550429275, "accept_length": 2.4152394292465176 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 575.6879373469175, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", "output_throughput": 838.0962787179271, "accept_length": 2.3145643059121785 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", "output_throughput": 924.0808096194634, "accept_length": 2.573260793115575 } ] } ] } }, "Llama-4-Scout-17B-16E-Instruct": { "gsm8k": { "benchmark_name": "gsm8k", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 455.9311905316165, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 816.6176343207234, "accept_length": 2.435108707729916 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 908.8655650704263, "accept_length": 3.1118742007294085 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 455.9311905316165, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 806.5328373116205, "accept_length": 2.6234459324405357 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 971.8534490877095, "accept_length": 3.8715801886792454 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 455.9311905316165, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 708.8133468064259, "accept_length": 2.146746247607535 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 818.3072714693558, "accept_length": 2.918526679710503 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 455.9311905316165, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 765.9810114809961, "accept_length": 2.675257522087863 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 957.227019602509, "accept_length": 4.307217442700466 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 455.9311905316165, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 675.0775309782273, "accept_length": 2.144316290813106 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 814.5839518607636, "accept_length": 2.627502101582583 } ] } ] }, "math500": { "benchmark_name": "math500", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 561.835811548351, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1478.9989946720648, "accept_length": 2.366719134681358 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1884.3462895109676, "accept_length": 3.238557789111507 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 561.835811548351, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1447.5513200323323, "accept_length": 2.5898901840327406 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 2100.7682204066577, "accept_length": 4.153214423200308 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 561.835811548351, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1199.1485073659853, "accept_length": 2.489558557182447 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1457.2169829849418, "accept_length": 3.2046972238757507 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 561.835811548351, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1330.0337890073868, "accept_length": 2.648556845221877 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 2110.3314050998847, "accept_length": 4.7805795395081105 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 561.835811548351, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1153.7706965189202, "accept_length": 2.6314392278632304 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1369.6607164745208, "accept_length": 3.2076523352436657 } ] } ] }, "mtbench": { "benchmark_name": "mtbench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 502.10114738381606, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1252.9681990096112, "accept_length": 2.3541095408844828 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1302.3829223511154, "accept_length": 2.4913843888070693 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 502.10114738381606, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1225.4607594389363, "accept_length": 2.5648559607722956 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1312.399917450856, "accept_length": 2.836414637256152 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 502.10114738381606, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 953.148992300308, "accept_length": 2.222710749523974 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 967.1281111811169, "accept_length": 2.3256101583113455 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 502.10114738381606, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1157.0433602013916, "accept_length": 2.649528603387664 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1276.9552963643773, "accept_length": 3.0189181867437243 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 502.10114738381606, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 940.9893388280037, "accept_length": 2.3959043407227965 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1010.4098410869198, "accept_length": 2.7008052625609618 } ] } ] }, "humaneval": { "benchmark_name": "humaneval", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 631.8746804703884, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1515.800628974162, "accept_length": 2.664927494512612 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1749.0012751674196, "accept_length": 3.224152798137449 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 631.8746804703884, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1556.515161340629, "accept_length": 3.085438335809807 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1921.2922045342316, "accept_length": 4.140846637369973 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 631.8746804703884, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1201.849883743592, "accept_length": 2.6006220481511346 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1393.1592557980014, "accept_length": 3.1744799971652315 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 631.8746804703884, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1456.346786965349, "accept_length": 3.2582381225462083 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1944.8214954525663, "accept_length": 4.7947306331104995 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 631.8746804703884, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1109.058302621911, "accept_length": 2.6508010386556267 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1234.7042057027743, "accept_length": 3.0442784990549376 } ] } ] }, "livecodebench": { "benchmark_name": "livecodebench", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 484.2501137181978, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1598.2921930690502, "accept_length": 2.487202280374381 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1933.9962764283844, "accept_length": 3.14740116583215 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 484.2501137181978, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1601.2688464385185, "accept_length": 2.8043640587405627 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 2144.3319751584095, "accept_length": 3.983057732747085 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 484.2501137181978, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1051.7266219288254, "accept_length": 2.1138485934104656 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1320.656674087923, "accept_length": 2.7145795398417976 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 484.2501137181978, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1501.558947290443, "accept_length": 2.929916684169992 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 2170.188140733029, "accept_length": 4.55060712303548 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 484.2501137181978, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1009.5574686537159, "accept_length": 2.2590065740745002 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1249.8114756626915, "accept_length": 2.8130523194007555 } ] } ] }, "financeqa": { "benchmark_name": "financeqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 288.9007335547823, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1022.713052476267, "accept_length": 1.7952034022379475 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1189.61672405822, "accept_length": 2.2164571332464367 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 288.9007335547823, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 963.8209003406079, "accept_length": 1.8240590609583607 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1171.8275957081507, "accept_length": 2.408275220827522 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 288.9007335547823, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 755.8055387643059, "accept_length": 1.780077619663648 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 887.65933899505, "accept_length": 2.1907344347752975 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 288.9007335547823, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 885.0003924094965, "accept_length": 1.864155494076754 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1084.5573704005851, "accept_length": 2.459442783236034 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 288.9007335547823, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 773.7660016870891, "accept_length": 2.05643096671835 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 838.3207906571789, "accept_length": 2.1910908349096845 } ] } ] }, "gpqa": { "benchmark_name": "gpqa", "results": [ { "batch_size": 8, "steps": 3, "topk": 1, "num_draft_tokens": 4, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 541.0010469896803, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1320.0198779778916, "accept_length": 2.0166714112874526 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1482.2781495871964, "accept_length": 2.3200242800296755 } ] }, { "batch_size": 8, "steps": 5, "topk": 1, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 541.0010469896803, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1258.0775283103167, "accept_length": 2.135039169677331 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1468.3432054658438, "accept_length": 2.5528455284552845 } ] }, { "batch_size": 8, "steps": 5, "topk": 3, "num_draft_tokens": 6, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 541.0010469896803, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1405.110892125768, "accept_length": 2.8834021014937705 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1502.213627081269, "accept_length": 3.0623772161357583 } ] }, { "batch_size": 8, "steps": 7, "topk": 1, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 541.0010469896803, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1148.5409144989237, "accept_length": 2.1684843736177633 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1379.1223204247422, "accept_length": 2.672381928590287 } ] }, { "batch_size": 8, "steps": 7, "topk": 4, "num_draft_tokens": 8, "metrics": [ { "Name": "Wihtout EAGLE3", "output_throughput": 541.0010469896803, "accept_length": 1.0 }, { "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", "output_throughput": 1345.7377508882935, "accept_length": 3.044341630328194 }, { "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", "output_throughput": 1474.1967930541948, "accept_length": 3.315005686664771 } ] } ] } } }