{ "baseline_fp32": { "scenario": "baseline_fp32", "load_s": 6.827268551001907, "tokenize_s": 0.0005446634986583376, "prefill_forward_s": 0.20426781075184408, "first_token_latency_s": 0.15614239850037848, "generate_s": 6.644134370999382, "decode_tokens_per_s": 9.63279738845269, "token_match_vs_baseline": 1.0, "rss_before_load_gb": 0.41101837158203125, "rss_after_load_gb": 2.2806396484375, "rss_after_bench_gb": 2.3895835876464844 }, "rotorquant_pkg": { "scenario": "rotorquant_pkg", "load_s": 6.679943737995927, "tokenize_s": 0.0004972177503077546, "prefill_forward_s": 0.189673415499783, "first_token_latency_s": 0.15492356824870512, "generate_s": 6.788896262753042, "decode_tokens_per_s": 9.428512414252518, "token_match_vs_baseline": 0.08203125, "rss_before_load_gb": 0.7132225036621094, "rss_after_load_gb": 2.7602615356445312, "rss_after_bench_gb": 2.7602615356445312, "delta_vs_baseline": { "load_s": -0.1473248130059801, "prefill_forward_s": -0.014594395252061076, "first_token_latency_s": -0.001218830251673353, "generate_s": 0.14476189175366017, "decode_tokens_per_s": -0.20428497420017244, "rss_after_load_gb": 0.47962188720703125 } }, "runtime_dynamic_int8": { "scenario": "runtime_dynamic_int8", "load_s": 5.673944287002087, "tokenize_s": 0.0005328417501004878, "prefill_forward_s": 0.08282363574653573, "first_token_latency_s": 0.07344392174854875, "generate_s": 2.5142489557511, "decode_tokens_per_s": 25.45832190426116, "token_match_vs_baseline": 0.00390625, "rss_before_load_gb": 1.6898918151855469, "rss_after_load_gb": 2.797016143798828, "rss_after_bench_gb": 2.798816680908203, "delta_vs_baseline": { "load_s": -1.1533242639998207, "prefill_forward_s": -0.12144417500530835, "first_token_latency_s": -0.08269847675182973, "generate_s": -4.129885415248282, "decode_tokens_per_s": 15.825524515808471, "rss_after_load_gb": 0.5163764953613281 } } }