RotorQuant-ModelWeights-Runtime / artifacts /runtime_benchmark.json
cnmoro's picture
Upload 29 files
18f4d80 verified
{
"baseline_fp32": {
"scenario": "baseline_fp32",
"load_s": 6.827268551001907,
"tokenize_s": 0.0005446634986583376,
"prefill_forward_s": 0.20426781075184408,
"first_token_latency_s": 0.15614239850037848,
"generate_s": 6.644134370999382,
"decode_tokens_per_s": 9.63279738845269,
"token_match_vs_baseline": 1.0,
"rss_before_load_gb": 0.41101837158203125,
"rss_after_load_gb": 2.2806396484375,
"rss_after_bench_gb": 2.3895835876464844
},
"rotorquant_pkg": {
"scenario": "rotorquant_pkg",
"load_s": 6.679943737995927,
"tokenize_s": 0.0004972177503077546,
"prefill_forward_s": 0.189673415499783,
"first_token_latency_s": 0.15492356824870512,
"generate_s": 6.788896262753042,
"decode_tokens_per_s": 9.428512414252518,
"token_match_vs_baseline": 0.08203125,
"rss_before_load_gb": 0.7132225036621094,
"rss_after_load_gb": 2.7602615356445312,
"rss_after_bench_gb": 2.7602615356445312,
"delta_vs_baseline": {
"load_s": -0.1473248130059801,
"prefill_forward_s": -0.014594395252061076,
"first_token_latency_s": -0.001218830251673353,
"generate_s": 0.14476189175366017,
"decode_tokens_per_s": -0.20428497420017244,
"rss_after_load_gb": 0.47962188720703125
}
},
"runtime_dynamic_int8": {
"scenario": "runtime_dynamic_int8",
"load_s": 5.673944287002087,
"tokenize_s": 0.0005328417501004878,
"prefill_forward_s": 0.08282363574653573,
"first_token_latency_s": 0.07344392174854875,
"generate_s": 2.5142489557511,
"decode_tokens_per_s": 25.45832190426116,
"token_match_vs_baseline": 0.00390625,
"rss_before_load_gb": 1.6898918151855469,
"rss_after_load_gb": 2.797016143798828,
"rss_after_bench_gb": 2.798816680908203,
"delta_vs_baseline": {
"load_s": -1.1533242639998207,
"prefill_forward_s": -0.12144417500530835,
"first_token_latency_s": -0.08269847675182973,
"generate_s": -4.129885415248282,
"decode_tokens_per_s": 15.825524515808471,
"rss_after_load_gb": 0.5163764953613281
}
}
}