Add files using upload-large-folder tool
Browse files- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/metadata.json +111 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_1024B_128new_speedup_vs_batch.png +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_1024B_128new_throughput_vs_batch.png +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_1024B_64new_speedup_vs_batch.png +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_1024B_64new_throughput_vs_batch.png +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_256B_128new_speedup_vs_batch.png +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_256B_128new_throughput_vs_batch.png +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_256B_64new_speedup_vs_batch.png +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_256B_64new_throughput_vs_batch.png +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/results.json +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/runs.partial.jsonl +0 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/summary.csv +21 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/summary.md +76 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/warmup_runs.partial.jsonl +20 -0
- pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/warmup_summary.csv +21 -0
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/metadata.json
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"seed": 42,
|
| 4 |
+
"device": "cuda",
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"benchmark": {
|
| 7 |
+
"repeats": 3,
|
| 8 |
+
"batch_sizes": [
|
| 9 |
+
1,
|
| 10 |
+
2,
|
| 11 |
+
4,
|
| 12 |
+
8,
|
| 13 |
+
16
|
| 14 |
+
],
|
| 15 |
+
"max_new_tokens": [
|
| 16 |
+
64,
|
| 17 |
+
128
|
| 18 |
+
],
|
| 19 |
+
"stop_on_eos": true,
|
| 20 |
+
"measure_memory": true,
|
| 21 |
+
"collect_traces": true,
|
| 22 |
+
"continue_on_error": true,
|
| 23 |
+
"warmup": {
|
| 24 |
+
"enabled": true
|
| 25 |
+
}
|
| 26 |
+
},
|
| 27 |
+
"workloads": {
|
| 28 |
+
"synthetic": {
|
| 29 |
+
"enabled": false,
|
| 30 |
+
"prompt_byte_buckets": [
|
| 31 |
+
256,
|
| 32 |
+
1024
|
| 33 |
+
],
|
| 34 |
+
"samples_per_bucket": 128
|
| 35 |
+
},
|
| 36 |
+
"code": {
|
| 37 |
+
"enabled": true,
|
| 38 |
+
"data_path": "code_completion_exp/datasets/data_V4_full",
|
| 39 |
+
"split": "validation",
|
| 40 |
+
"prompt_byte_buckets": [
|
| 41 |
+
256,
|
| 42 |
+
1024
|
| 43 |
+
],
|
| 44 |
+
"samples_per_bucket": 128,
|
| 45 |
+
"max_samples_to_scan": 5000
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
"models": {
|
| 49 |
+
"enabled": [
|
| 50 |
+
"pythia"
|
| 51 |
+
],
|
| 52 |
+
"hnet": {
|
| 53 |
+
"config_path": "hnet_project/configs/hnet_2stage_XL_code.json",
|
| 54 |
+
"checkpoint_path": "hnet_project/checkpoints/hnet_2stage_XL_code.pt"
|
| 55 |
+
},
|
| 56 |
+
"hnet_serial": {
|
| 57 |
+
"config_path": "hnet_project/configs/hnet_2stage_XL_code.json",
|
| 58 |
+
"checkpoint_path": "hnet_project/checkpoints/hnet_2stage_XL_code.pt"
|
| 59 |
+
},
|
| 60 |
+
"bolmo": {
|
| 61 |
+
"model_name": "allenai/Bolmo-1B",
|
| 62 |
+
"cache_dir": "cache/bolmo",
|
| 63 |
+
"local_files_only": false
|
| 64 |
+
},
|
| 65 |
+
"pythia": {
|
| 66 |
+
"model_name": "EleutherAI/pythia-1.4b",
|
| 67 |
+
"cache_dir": "cache/hf/pythia-1.4b",
|
| 68 |
+
"local_files_only": false,
|
| 69 |
+
"max_context_len": 4096
|
| 70 |
+
}
|
| 71 |
+
},
|
| 72 |
+
"paths": {
|
| 73 |
+
"output_root": "outputs/pythia_1_4b_only"
|
| 74 |
+
}
|
| 75 |
+
},
|
| 76 |
+
"repo_root": "/workspace/byte-llms-code",
|
| 77 |
+
"workloads": {
|
| 78 |
+
"code/256B": 128,
|
| 79 |
+
"code/1024B": 128
|
| 80 |
+
},
|
| 81 |
+
"environment": {
|
| 82 |
+
"timestamp": "2026-05-11T14:19:08",
|
| 83 |
+
"hostname": "e41665783a40",
|
| 84 |
+
"platform": "Linux-6.8.0-110-generic-x86_64-with-glibc2.39",
|
| 85 |
+
"python": "3.12.0 | packaged by Anaconda, Inc. | (main, Oct 2 2023, 17:29:18) [GCC 11.2.0]",
|
| 86 |
+
"git_commit": "21e26deacf90f77563e3834cfb46e6dc64f52d6e",
|
| 87 |
+
"git_branch": "feature/generation-bench",
|
| 88 |
+
"git_dirty": false,
|
| 89 |
+
"torch": "2.6.0+cu124",
|
| 90 |
+
"torch_cuda": "12.4",
|
| 91 |
+
"cuda_available": true,
|
| 92 |
+
"transformers": "4.57.6",
|
| 93 |
+
"cuda_devices": [
|
| 94 |
+
{
|
| 95 |
+
"index": 0,
|
| 96 |
+
"name": "NVIDIA A100X",
|
| 97 |
+
"total_memory_mb": 81341.8125,
|
| 98 |
+
"major": 8,
|
| 99 |
+
"minor": 0
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"index": 1,
|
| 103 |
+
"name": "NVIDIA A100X",
|
| 104 |
+
"total_memory_mb": 81341.8125,
|
| 105 |
+
"major": 8,
|
| 106 |
+
"minor": 0
|
| 107 |
+
}
|
| 108 |
+
],
|
| 109 |
+
"nvidia_smi": "NVIDIA A100X, 81920 MiB, 570.86.10\nNVIDIA A100X, 81920 MiB, 570.86.10"
|
| 110 |
+
}
|
| 111 |
+
}
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_1024B_128new_speedup_vs_batch.png
ADDED
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_1024B_128new_throughput_vs_batch.png
ADDED
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_1024B_64new_speedup_vs_batch.png
ADDED
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_1024B_64new_throughput_vs_batch.png
ADDED
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_256B_128new_speedup_vs_batch.png
ADDED
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_256B_128new_throughput_vs_batch.png
ADDED
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_256B_64new_speedup_vs_batch.png
ADDED
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/pythia_code_256B_64new_throughput_vs_batch.png
ADDED
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/results.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/runs.partial.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/summary.csv
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,workload,batch_size,max_new_tokens,status,num_samples,num_chunks,num_failures,prompt_bytes_mean,generated_units_mean,generated_bytes_mean,prefill_time_sec,decode_time_sec,total_time_sec,prefill_bytes_per_sec,prefill_units_per_sec,decode_bytes_per_sec,decode_units_per_sec,total_bytes_per_sec,total_units_per_sec,speedup_vs_batch1,peak_allocated_mb,peak_reserved_mb
|
| 2 |
+
pythia,code/1024B,1,64,ok,384,384,0,472.0625,64.0,231.789062,0.020406,0.840027,0.860468,23133.1454,6686.422356,275.930476,76.188023,269.375487,74.378105,1.0,2804.630859,2870.0
|
| 3 |
+
pythia,code/1024B,1,128,ok,384,384,0,472.0625,128.0,459.90625,0.020432,1.673192,1.693659,23103.745601,6677.924615,274.867601,76.500489,271.546016,75.576033,1.0,2804.630859,2872.0
|
| 4 |
+
pythia,code/1024B,2,64,ok,384,192,0,472.0625,63.90625,247.546875,0.02329,0.852987,0.876312,40538.20878,11717.195425,580.423417,149.841051,564.974598,145.852812,2.09735,2899.330566,2996.0
|
| 5 |
+
pythia,code/1024B,2,128,ok,384,192,0,472.0625,127.40625,492.085938,0.02346,1.691949,1.715443,40244.585499,11632.326323,581.679244,150.602904,573.712867,148.540325,2.112765,2899.330566,2996.0
|
| 6 |
+
pythia,code/1024B,4,64,ok,384,96,0,472.0625,64.0,249.5625,0.024795,0.862661,0.88749,76153.945968,22011.595828,1157.175526,296.756258,1124.801061,288.453866,4.175588,3101.554688,3266.0
|
| 7 |
+
pythia,code/1024B,4,128,ok,384,96,0,472.0625,128.0,501.617188,0.025396,1.722351,1.747781,74352.193318,21490.81584,1164.959549,297.268168,1148.009275,292.942887,4.227679,3101.554688,3266.0
|
| 8 |
+
pythia,code/1024B,8,64,ok,384,48,0,472.0625,63.5,270.976562,0.041786,0.866821,0.908643,90376.595543,26122.521534,2500.877605,586.049681,2385.769571,559.075539,8.856669,3479.240234,3724.0
|
| 9 |
+
pythia,code/1024B,8,128,ok,384,48,0,472.0625,127.0,539.132812,0.042597,1.742067,1.7847,88656.855485,25625.446529,2475.829784,583.215073,2416.687835,569.28339,8.899736,3479.240234,3788.0
|
| 10 |
+
pythia,code/1024B,16,64,ok,384,24,0,472.0625,64.0,285.5,0.087467,0.868358,0.955861,86352.844169,24959.493304,5260.505638,1179.237691,4778.939445,1071.2859,17.74081,4257.402344,4702.0
|
| 11 |
+
pythia,code/1024B,16,128,ok,384,24,0,472.0625,128.0,570.375,0.086296,1.732651,1.818983,87524.255563,25298.078965,5267.074309,1182.003965,5017.089504,1125.903934,18.47602,4257.402344,4882.0
|
| 12 |
+
pythia,code/256B,1,64,ok,384,384,0,196.679688,62.75,206.476562,0.017778,0.809746,0.827558,11062.831983,3422.776497,254.98933,77.493446,249.501133,75.825536,1.0,2736.92627,2808.0
|
| 13 |
+
pythia,code/256B,1,128,ok,384,384,0,196.679688,125.25,408.851562,0.018076,1.627459,1.645568,10880.849997,3366.472319,251.220846,76.960476,248.456247,76.113553,1.0,2748.92627,2822.0
|
| 14 |
+
pythia,code/256B,2,64,ok,384,192,0,196.679688,62.398438,216.585938,0.024395,0.83908,0.863508,16124.885481,4988.946693,516.245962,148.730531,501.641865,144.523088,2.01058,2785.487793,2842.0
|
| 15 |
+
pythia,code/256B,2,128,ok,384,192,0,196.679688,124.398438,424.867188,0.024907,1.692057,1.716998,15793.011972,4886.266942,502.190014,147.038074,494.89541,144.90226,1.991882,2798.550293,2878.0
|
| 16 |
+
pythia,code/256B,4,64,ok,384,96,0,196.679688,61.484375,212.445312,0.022349,0.843246,0.865629,35201.41962,10891.116482,1007.750589,291.655835,981.692594,284.11432,3.934622,2828.291992,2890.0
|
| 17 |
+
pythia,code/256B,4,128,ok,384,96,0,196.679688,121.4375,413.96875,0.022156,1.691158,1.713348,35508.251333,10986.048446,979.13682,287.229235,966.455874,283.509287,3.889843,2882.620117,2952.0
|
| 18 |
+
pythia,code/256B,8,64,ok,384,48,0,196.679688,61.695312,213.546875,0.025396,0.85317,0.878601,61954.984952,19168.515503,2002.385366,578.504325,1944.427007,561.759716,7.793259,2948.884766,3054.0
|
| 19 |
+
pythia,code/256B,8,128,ok,384,48,0,196.679688,122.453125,420.859375,0.02497,1.731794,1.7568,63011.902711,19495.51977,1944.154676,565.670696,1916.481607,557.618948,7.713558,3045.759766,3270.0
|
| 20 |
+
pythia,code/256B,16,64,ok,384,24,0,196.679688,62.179688,232.234375,0.032621,0.890241,0.922905,96466.999355,29846.333981,4173.871075,1117.534814,4026.147553,1077.98252,16.136791,3191.039062,3370.0
|
| 21 |
+
pythia,code/256B,16,128,ok,384,24,0,196.679688,123.25,460.726562,0.034704,1.7574,1.79214,90678.427661,28055.383239,4194.618796,1122.111918,4113.308407,1100.360392,16.555464,3389.164062,3878.0
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/summary.md
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Generation Speed Benchmark
|
| 2 |
+
|
| 3 |
+
## Speed
|
| 4 |
+
|
| 5 |
+
| Model | Workload | Batch | Max new | Samples | Status | Prompt bytes | Prefill ms/chunk | Prefill bytes/s | Prefill units/s | Decode ms/chunk | Decode bytes/s | Decode units/s | Total bytes/s | Speedup vs B=1 | Peak VRAM MB |
|
| 6 |
+
|---|---|---:|---:|---:|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|
| 7 |
+
| pythia | code/1024B | 1 | 64 | 384 | ok | 472.1 | 20.4 | 23133.1 | 6686.4 | 840.0 | 275.9 | 76.2 | 269.4 | 1.0 | 2804.6 |
|
| 8 |
+
| pythia | code/1024B | 1 | 128 | 384 | ok | 472.1 | 20.4 | 23103.7 | 6677.9 | 1673.2 | 274.9 | 76.5 | 271.5 | 1.0 | 2804.6 |
|
| 9 |
+
| pythia | code/1024B | 2 | 64 | 384 | ok | 472.1 | 23.3 | 40538.2 | 11717.2 | 853.0 | 580.4 | 149.8 | 565.0 | 2.1 | 2899.3 |
|
| 10 |
+
| pythia | code/1024B | 2 | 128 | 384 | ok | 472.1 | 23.5 | 40244.6 | 11632.3 | 1691.9 | 581.7 | 150.6 | 573.7 | 2.11 | 2899.3 |
|
| 11 |
+
| pythia | code/1024B | 4 | 64 | 384 | ok | 472.1 | 24.8 | 76153.9 | 22011.6 | 862.7 | 1157.2 | 296.8 | 1124.8 | 4.18 | 3101.6 |
|
| 12 |
+
| pythia | code/1024B | 4 | 128 | 384 | ok | 472.1 | 25.4 | 74352.2 | 21490.8 | 1722.4 | 1165.0 | 297.3 | 1148.0 | 4.23 | 3101.6 |
|
| 13 |
+
| pythia | code/1024B | 8 | 64 | 384 | ok | 472.1 | 41.8 | 90376.6 | 26122.5 | 866.8 | 2500.9 | 586.0 | 2385.8 | 8.86 | 3479.2 |
|
| 14 |
+
| pythia | code/1024B | 8 | 128 | 384 | ok | 472.1 | 42.6 | 88656.9 | 25625.4 | 1742.1 | 2475.8 | 583.2 | 2416.7 | 8.9 | 3479.2 |
|
| 15 |
+
| pythia | code/1024B | 16 | 64 | 384 | ok | 472.1 | 87.5 | 86352.8 | 24959.5 | 868.4 | 5260.5 | 1179.2 | 4778.9 | 17.74 | 4257.4 |
|
| 16 |
+
| pythia | code/1024B | 16 | 128 | 384 | ok | 472.1 | 86.3 | 87524.3 | 25298.1 | 1732.7 | 5267.1 | 1182.0 | 5017.1 | 18.48 | 4257.4 |
|
| 17 |
+
| pythia | code/256B | 1 | 64 | 384 | ok | 196.7 | 17.8 | 11062.8 | 3422.8 | 809.7 | 255.0 | 77.5 | 249.5 | 1.0 | 2736.9 |
|
| 18 |
+
| pythia | code/256B | 1 | 128 | 384 | ok | 196.7 | 18.1 | 10880.8 | 3366.5 | 1627.5 | 251.2 | 77.0 | 248.5 | 1.0 | 2748.9 |
|
| 19 |
+
| pythia | code/256B | 2 | 64 | 384 | ok | 196.7 | 24.4 | 16124.9 | 4988.9 | 839.1 | 516.2 | 148.7 | 501.6 | 2.01 | 2785.5 |
|
| 20 |
+
| pythia | code/256B | 2 | 128 | 384 | ok | 196.7 | 24.9 | 15793.0 | 4886.3 | 1692.1 | 502.2 | 147.0 | 494.9 | 1.99 | 2798.6 |
|
| 21 |
+
| pythia | code/256B | 4 | 64 | 384 | ok | 196.7 | 22.3 | 35201.4 | 10891.1 | 843.2 | 1007.8 | 291.7 | 981.7 | 3.93 | 2828.3 |
|
| 22 |
+
| pythia | code/256B | 4 | 128 | 384 | ok | 196.7 | 22.2 | 35508.3 | 10986.0 | 1691.2 | 979.1 | 287.2 | 966.5 | 3.89 | 2882.6 |
|
| 23 |
+
| pythia | code/256B | 8 | 64 | 384 | ok | 196.7 | 25.4 | 61955.0 | 19168.5 | 853.2 | 2002.4 | 578.5 | 1944.4 | 7.79 | 2948.9 |
|
| 24 |
+
| pythia | code/256B | 8 | 128 | 384 | ok | 196.7 | 25.0 | 63011.9 | 19495.5 | 1731.8 | 1944.2 | 565.7 | 1916.5 | 7.71 | 3045.8 |
|
| 25 |
+
| pythia | code/256B | 16 | 64 | 384 | ok | 196.7 | 32.6 | 96467.0 | 29846.3 | 890.2 | 4173.9 | 1117.5 | 4026.1 | 16.14 | 3191.0 |
|
| 26 |
+
| pythia | code/256B | 16 | 128 | 384 | ok | 196.7 | 34.7 | 90678.4 | 28055.4 | 1757.4 | 4194.6 | 1122.1 | 4113.3 | 16.56 | 3389.2 |
|
| 27 |
+
|
| 28 |
+
## Warmup
|
| 29 |
+
|
| 30 |
+
| Model | Workload | Batch | Max new | Samples | Status | Prompt bytes | Prefill ms/chunk | Decode ms/chunk | Total ms/chunk | Peak VRAM MB |
|
| 31 |
+
|---|---|---:|---:|---:|---|---:|---:|---:|---:|---:|
|
| 32 |
+
| pythia | code/1024B | 1 | 64 | 1 | ok | 265.0 | 21.1 | 834.0 | 855.1 | 2737.1 |
|
| 33 |
+
| pythia | code/1024B | 1 | 128 | 1 | ok | 265.0 | 18.5 | 1684.2 | 1702.7 | 2749.1 |
|
| 34 |
+
| pythia | code/1024B | 2 | 64 | 2 | ok | 613.5 | 23.4 | 860.0 | 883.5 | 2868.7 |
|
| 35 |
+
| pythia | code/1024B | 2 | 128 | 2 | ok | 613.5 | 22.5 | 1689.3 | 1711.8 | 2871.7 |
|
| 36 |
+
| pythia | code/1024B | 4 | 64 | 4 | ok | 531.5 | 26.8 | 867.4 | 894.2 | 3034.2 |
|
| 37 |
+
| pythia | code/1024B | 4 | 128 | 4 | ok | 531.5 | 32.6 | 1706.9 | 1739.6 | 3034.2 |
|
| 38 |
+
| pythia | code/1024B | 8 | 64 | 8 | ok | 425.2 | 45.5 | 871.9 | 917.4 | 3344.9 |
|
| 39 |
+
| pythia | code/1024B | 8 | 128 | 8 | ok | 425.2 | 47.6 | 1756.2 | 1803.9 | 3344.9 |
|
| 40 |
+
| pythia | code/1024B | 16 | 64 | 16 | ok | 423.9 | 86.6 | 877.5 | 964.1 | 3998.7 |
|
| 41 |
+
| pythia | code/1024B | 16 | 128 | 16 | ok | 423.9 | 88.0 | 1726.2 | 1814.3 | 3998.7 |
|
| 42 |
+
| pythia | code/256B | 1 | 64 | 1 | ok | 212.0 | 177.3 | 858.3 | 1035.6 | 2734.7 |
|
| 43 |
+
| pythia | code/256B | 1 | 128 | 1 | ok | 212.0 | 18.0 | 1659.8 | 1677.8 | 2746.7 |
|
| 44 |
+
| pythia | code/256B | 2 | 64 | 2 | ok | 196.0 | 56.8 | 839.8 | 896.6 | 2783.5 |
|
| 45 |
+
| pythia | code/256B | 2 | 128 | 2 | ok | 196.0 | 24.4 | 1679.2 | 1703.6 | 2796.4 |
|
| 46 |
+
| pythia | code/256B | 4 | 64 | 4 | ok | 187.8 | 20.9 | 841.7 | 862.6 | 2818.7 |
|
| 47 |
+
| pythia | code/256B | 4 | 128 | 4 | ok | 187.8 | 20.5 | 1701.3 | 1721.8 | 2877.4 |
|
| 48 |
+
| pythia | code/256B | 8 | 64 | 8 | ok | 190.2 | 24.5 | 853.1 | 877.6 | 2930.4 |
|
| 49 |
+
| pythia | code/256B | 8 | 128 | 8 | ok | 190.2 | 24.2 | 1705.1 | 1729.3 | 3031.1 |
|
| 50 |
+
| pythia | code/256B | 16 | 64 | 16 | ok | 179.8 | 31.1 | 927.5 | 958.7 | 3152.9 |
|
| 51 |
+
| pythia | code/256B | 16 | 128 | 16 | ok | 179.8 | 33.5 | 1759.9 | 1793.4 | 3367.2 |
|
| 52 |
+
|
| 53 |
+
## Compression
|
| 54 |
+
|
| 55 |
+
| Model | Workload | Raw prompt bytes | Model input units | Stage0 / patches | Stage1 | Bytes per stage0 | Stage0 per stage1 |
|
| 56 |
+
|---|---|---:|---:|---:|---:|---:|---:|
|
| 57 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 58 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 59 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 60 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 61 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 62 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 63 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 64 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 65 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 66 |
+
| pythia | code/1024B | 181272.0 | 52395.0 | None | None | None | None |
|
| 67 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 68 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 69 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 70 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 71 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 72 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 73 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 74 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 75 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
| 76 |
+
| pythia | code/256B | 75525.0 | 23367.0 | None | None | None | None |
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/warmup_runs.partial.jsonl
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 1, "max_new_tokens": 64, "prefill_time_sec": 0.1772817859891802, "decode_time_sec": 0.8583101750118658, "total_time_sec": 1.0356268809409812, "prefill_units_per_sec": 456.899729140497, "prefill_bytes_per_sec": 1195.8363281208071, "decode_units_per_sec": 74.56511860542166, "decode_bytes_per_sec": 208.54931609953871, "total_units_per_sec": 61.79831865879046, "total_bytes_per_sec": 172.84217249880456, "step_latency_ms": {"count": 64, "sum": 842, "mean": 13.15625, "median": 13.0, "p95": 13.0, "max": 19, "min": 13}, "peak_allocated_mb": 2734.67626953125, "peak_reserved_mb": 2806.0, "prompt_stats": {"raw_bytes": {"count": 1, "sum": 212, "mean": 212.0, "median": 212.0, "p95": 212.0, "max": 212, "min": 212}, "model_input_units": {"count": 1, "sum": 81, "mean": 81.0, "median": 81.0, "p95": 81.0, "max": 81, "min": 81}}, "generated_stats": {"units": {"count": 1, "sum": 64, "mean": 64.0, "median": 64.0, "p95": 64.0, "max": 64, "min": 64}, "bytes": {"count": 1, "sum": 179, "mean": 179.0, "median": 179.0, "p95": 179.0, "max": 179, "min": 179}}, "compression_summary": {"raw_prompt_bytes": 212, "model_input_units": 81, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 2.617283950617284, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 1, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 2 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 1, "max_new_tokens": 128, "prefill_time_sec": 0.018028639955446124, "decode_time_sec": 1.6597656729863957, "total_time_sec": 1.6778264400782064, "prefill_units_per_sec": 4492.851385360956, "prefill_bytes_per_sec": 11759.06782341386, "decode_units_per_sec": 77.11931996381827, "decode_bytes_per_sec": 240.39538019971474, "total_units_per_sec": 76.28917803562192, "total_bytes_per_sec": 237.80767215791516, "step_latency_ms": {"count": 128, "sum": 1650, "mean": 12.890625, "median": 13.0, "p95": 13.0, "max": 15, "min": 12}, "peak_allocated_mb": 2746.67626953125, "peak_reserved_mb": 2822.0, "prompt_stats": {"raw_bytes": {"count": 1, "sum": 212, "mean": 212.0, "median": 212.0, "p95": 212.0, "max": 212, "min": 212}, "model_input_units": {"count": 1, "sum": 81, "mean": 81.0, "median": 81.0, "p95": 81.0, "max": 81, "min": 81}}, "generated_stats": {"units": {"count": 1, "sum": 128, "mean": 128.0, "median": 128.0, "p95": 128.0, "max": 128, "min": 128}, "bytes": {"count": 1, "sum": 399, "mean": 399.0, "median": 399.0, "p95": 399.0, "max": 399, "min": 399}}, "compression_summary": {"raw_prompt_bytes": 212, "model_input_units": 81, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 2.617283950617284, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 1, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 3 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 2, "max_new_tokens": 64, "prefill_time_sec": 0.056790383998304605, "decode_time_sec": 0.8397572889225557, "total_time_sec": 0.8965851089451462, "prefill_units_per_sec": 2359.5543922365514, "prefill_bytes_per_sec": 6902.5770280352845, "decode_units_per_sec": 152.42499432690775, "decode_bytes_per_sec": 397.73396957177494, "total_units_per_sec": 142.76391468356536, "total_bytes_per_sec": 372.52458987742835, "step_latency_ms": {"count": 64, "sum": 834, "mean": 13.03125, "median": 13.0, "p95": 13.0, "max": 14, "min": 13}, "peak_allocated_mb": 2783.51904296875, "peak_reserved_mb": 2840.0, "prompt_stats": {"raw_bytes": {"count": 2, "sum": 392, "mean": 196.0, "median": 196.0, "p95": 210.4, "max": 212, "min": 180}, "model_input_units": {"count": 2, "sum": 134, "mean": 67.0, "median": 67.0, "p95": 79.6, "max": 81, "min": 53}}, "generated_stats": {"units": {"count": 2, "sum": 128, "mean": 64.0, "median": 64.0, "p95": 64.0, "max": 64, "min": 64}, "bytes": {"count": 2, "sum": 334, "mean": 167.0, "median": 167.0, "p95": 177.8, "max": 179, "min": 155}}, "compression_summary": {"raw_prompt_bytes": 392, "model_input_units": 134, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 2.925373134328358, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 2, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 4 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 2, "max_new_tokens": 128, "prefill_time_sec": 0.02438490593340248, "decode_time_sec": 1.679199978010729, "total_time_sec": 1.7036184089956805, "prefill_units_per_sec": 5495.202662088051, "prefill_bytes_per_sec": 16075.518235362058, "decode_units_per_sec": 152.45355130558747, "decode_bytes_per_sec": 419.84278777515294, "total_units_per_sec": 150.2683926448749, "total_bytes_per_sec": 413.825065682175, "step_latency_ms": {"count": 128, "sum": 1665, "mean": 13.0078125, "median": 13.0, "p95": 13.0, "max": 14, "min": 13}, "peak_allocated_mb": 2796.37841796875, "peak_reserved_mb": 2860.0, "prompt_stats": {"raw_bytes": {"count": 2, "sum": 392, "mean": 196.0, "median": 196.0, "p95": 210.4, "max": 212, "min": 180}, "model_input_units": {"count": 2, "sum": 134, "mean": 67.0, "median": 67.0, "p95": 79.6, "max": 81, "min": 53}}, "generated_stats": {"units": {"count": 2, "sum": 256, "mean": 128.0, "median": 128.0, "p95": 128.0, "max": 128, "min": 128}, "bytes": {"count": 2, "sum": 705, "mean": 352.5, "median": 352.5, "p95": 394.35, "max": 399, "min": 306}}, "compression_summary": {"raw_prompt_bytes": 392, "model_input_units": 134, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 2.925373134328358, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 2, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 5 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 4, "max_new_tokens": 64, "prefill_time_sec": 0.020852572983130813, "decode_time_sec": 0.8416975099826232, "total_time_sec": 0.8625828389776871, "prefill_units_per_sec": 11845.061048332815, "prefill_bytes_per_sec": 36014.74027246131, "decode_units_per_sec": 280.38576472071566, "decode_bytes_per_sec": 767.496627159247, "total_units_per_sec": 273.59691073810563, "total_bytes_per_sec": 748.913577698374, "step_latency_ms": {"count": 64, "sum": 832, "mean": 13.0, "median": 13.0, "p95": 13.0, "max": 13, "min": 13}, "peak_allocated_mb": 2818.6982421875, "peak_reserved_mb": 2864.0, "prompt_stats": {"raw_bytes": {"count": 4, "sum": 751, "mean": 187.75, "median": 182.5, "p95": 207.95, "max": 212, "min": 174}, "model_input_units": {"count": 4, "sum": 247, "mean": 61.75, "median": 58.5, "p95": 78.44999999999999, "max": 81, "min": 49}}, "generated_stats": {"units": {"count": 4, "sum": 236, "mean": 59.0, "median": 64.0, "p95": 64.0, "max": 64, "min": 44}, "bytes": {"count": 4, "sum": 646, "mean": 161.5, "median": 167.0, "p95": 187.5, "max": 189, "min": 123}}, "compression_summary": {"raw_prompt_bytes": 751, "model_input_units": 247, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.0404858299595143, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 4, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 6 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 4, "max_new_tokens": 128, "prefill_time_sec": 0.02051859605126083, "decode_time_sec": 1.7012977009871975, "total_time_sec": 1.7218498209258541, "prefill_units_per_sec": 12037.860650062474, "prefill_bytes_per_sec": 36600.94472954218, "decode_units_per_sec": 251.5726670009888, "decode_bytes_per_sec": 709.458432406994, "total_units_per_sec": 248.56987804538062, "total_bytes_per_sec": 700.9902869176972, "step_latency_ms": {"count": 128, "sum": 1664, "mean": 13.0, "median": 13.0, "p95": 13.0, "max": 13, "min": 13}, "peak_allocated_mb": 2877.4013671875, "peak_reserved_mb": 2904.0, "prompt_stats": {"raw_bytes": {"count": 4, "sum": 751, "mean": 187.75, "median": 182.5, "p95": 207.95, "max": 212, "min": 174}, "model_input_units": {"count": 4, "sum": 247, "mean": 61.75, "median": 58.5, "p95": 78.44999999999999, "max": 81, "min": 49}}, "generated_stats": {"units": {"count": 4, "sum": 428, "mean": 107.0, "median": 128.0, "p95": 128.0, "max": 128, "min": 44}, "bytes": {"count": 4, "sum": 1207, "mean": 301.75, "median": 342.5, "p95": 396.0, "max": 399, "min": 123}}, "compression_summary": {"raw_prompt_bytes": 751, "model_input_units": 247, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.0404858299595143, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 4, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 7 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 8, "max_new_tokens": 64, "prefill_time_sec": 0.02449842100031674, "decode_time_sec": 0.8530876860022545, "total_time_sec": 0.877618792001158, "prefill_units_per_sec": 19348.18574608836, "prefill_bytes_per_sec": 62126.45296528793, "decode_units_per_sec": 576.7285216665286, "decode_bytes_per_sec": 1693.846979284825, "total_units_per_sec": 560.6078681133697, "total_bytes_per_sec": 1646.500750861421, "step_latency_ms": {"count": 64, "sum": 832, "mean": 13.0, "median": 13.0, "p95": 13.0, "max": 13, "min": 13}, "peak_allocated_mb": 2930.3525390625, "peak_reserved_mb": 3054.0, "prompt_stats": {"raw_bytes": {"count": 8, "sum": 1522, "mean": 190.25, "median": 186.0, "p95": 215.25, "max": 217, "min": 168}, "model_input_units": {"count": 8, "sum": 474, "mean": 59.25, "median": 57.0, "p95": 75.05, "max": 81, "min": 49}}, "generated_stats": {"units": {"count": 8, "sum": 492, "mean": 61.5, "median": 64.0, "p95": 64.0, "max": 64, "min": 44}, "bytes": {"count": 8, "sum": 1445, "mean": 180.625, "median": 169.5, "p95": 258.94999999999993, "max": 288, "min": 123}}, "compression_summary": {"raw_prompt_bytes": 1522, "model_input_units": 474, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.210970464135021, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 8, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 8 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 8, "max_new_tokens": 128, "prefill_time_sec": 0.024204600020311773, "decode_time_sec": 1.7050928949611261, "total_time_sec": 1.729331088019535, "prefill_units_per_sec": 19583.05444428883, "prefill_bytes_per_sec": 62880.609418159496, "decode_units_per_sec": 551.2896117143405, "decode_bytes_per_sec": 1631.582659350314, "total_units_per_sec": 543.5627720522315, "total_bytes_per_sec": 1608.714501967349, "step_latency_ms": {"count": 128, "sum": 1665, "mean": 13.0078125, "median": 13.0, "p95": 13.0, "max": 14, "min": 13}, "peak_allocated_mb": 3031.1337890625, "peak_reserved_mb": 3174.0, "prompt_stats": {"raw_bytes": {"count": 8, "sum": 1522, "mean": 190.25, "median": 186.0, "p95": 215.25, "max": 217, "min": 168}, "model_input_units": {"count": 8, "sum": 474, "mean": 59.25, "median": 57.0, "p95": 75.05, "max": 81, "min": 49}}, "generated_stats": {"units": {"count": 8, "sum": 940, "mean": 117.5, "median": 128.0, "p95": 128.0, "max": 128, "min": 44}, "bytes": {"count": 8, "sum": 2782, "mean": 347.75, "median": 350.5, "p95": 514.05, "max": 576, "min": 123}}, "compression_summary": {"raw_prompt_bytes": 1522, "model_input_units": 474, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.210970464135021, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 8, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 9 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 16, "max_new_tokens": 64, "prefill_time_sec": 0.031096516060642898, "decode_time_sec": 0.9275241360301152, "total_time_sec": 0.958660879987292, "prefill_units_per_sec": 28588.411584960704, "prefill_bytes_per_sec": 92486.24490252754, "decode_units_per_sec": 1082.4516160810738, "decode_bytes_per_sec": 4240.320922357434, "total_units_per_sec": 1047.2942215116873, "total_bytes_per_sec": 4102.597782077158, "step_latency_ms": {"count": 64, "sum": 862, "mean": 13.46875, "median": 13.0, "p95": 14.0, "max": 15, "min": 13}, "peak_allocated_mb": 3152.8759765625, "peak_reserved_mb": 3322.0, "prompt_stats": {"raw_bytes": {"count": 16, "sum": 2876, "mean": 179.75, "median": 182.0, "p95": 220.75, "max": 232, "min": 103}, "model_input_units": {"count": 16, "sum": 889, "mean": 55.5625, "median": 54.0, "p95": 69.75, "max": 81, "min": 41}}, "generated_stats": {"units": {"count": 16, "sum": 1004, "mean": 62.75, "median": 64.0, "p95": 64.0, "max": 64, "min": 44}, "bytes": {"count": 16, "sum": 3933, "mean": 245.8125, "median": 199.5, "p95": 516.25, "max": 652, "min": 67}}, "compression_summary": {"raw_prompt_bytes": 2876, "model_input_units": 889, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.235095613048369, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 16, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 10 |
+
{"model": "pythia", "workload": "code/256B", "batch_size": 16, "max_new_tokens": 128, "prefill_time_sec": 0.033466362045146525, "decode_time_sec": 1.759892665897496, "total_time_sec": 1.7933940179646015, "prefill_units_per_sec": 26563.986811614846, "prefill_bytes_per_sec": 85937.03719932992, "decode_units_per_sec": 1115.977149094155, "decode_bytes_per_sec": 4671.307608300941, "total_units_per_sec": 1095.1302281185406, "total_bytes_per_sec": 4584.045623911671, "step_latency_ms": {"count": 128, "sum": 1676, "mean": 13.09375, "median": 13.0, "p95": 13.0, "max": 21, "min": 13}, "peak_allocated_mb": 3367.1611328125, "peak_reserved_mb": 3850.0, "prompt_stats": {"raw_bytes": {"count": 16, "sum": 2876, "mean": 179.75, "median": 182.0, "p95": 220.75, "max": 232, "min": 103}, "model_input_units": {"count": 16, "sum": 889, "mean": 55.5625, "median": 54.0, "p95": 69.75, "max": 81, "min": 41}}, "generated_stats": {"units": {"count": 16, "sum": 1964, "mean": 122.75, "median": 128.0, "p95": 128.0, "max": 128, "min": 44}, "bytes": {"count": 16, "sum": 8221, "mean": 513.8125, "median": 412.0, "p95": 1330.0, "max": 1348, "min": 124}}, "compression_summary": {"raw_prompt_bytes": 2876, "model_input_units": 889, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.235095613048369, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 16, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 11 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 1, "max_new_tokens": 64, "prefill_time_sec": 0.02105093002319336, "decode_time_sec": 0.8339903840096667, "total_time_sec": 0.8550744180101901, "prefill_units_per_sec": 4465.36090787596, "prefill_bytes_per_sec": 12588.517453054568, "decode_units_per_sec": 76.73949391634494, "decode_bytes_per_sec": 236.21375471124927, "total_units_per_sec": 74.84728656592472, "total_bytes_per_sec": 230.389303960737, "step_latency_ms": {"count": 64, "sum": 833, "mean": 13.015625, "median": 13.0, "p95": 13.0, "max": 14, "min": 13}, "peak_allocated_mb": 2737.11376953125, "peak_reserved_mb": 2806.0, "prompt_stats": {"raw_bytes": {"count": 1, "sum": 265, "mean": 265.0, "median": 265.0, "p95": 265.0, "max": 265, "min": 265}, "model_input_units": {"count": 1, "sum": 94, "mean": 94.0, "median": 94.0, "p95": 94.0, "max": 94, "min": 94}}, "generated_stats": {"units": {"count": 1, "sum": 64, "mean": 64.0, "median": 64.0, "p95": 64.0, "max": 64, "min": 64}, "bytes": {"count": 1, "sum": 197, "mean": 197.0, "median": 197.0, "p95": 197.0, "max": 197, "min": 197}}, "compression_summary": {"raw_prompt_bytes": 265, "model_input_units": 94, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 2.8191489361702127, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 1, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 12 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 1, "max_new_tokens": 128, "prefill_time_sec": 0.018510253983549774, "decode_time_sec": 1.6841918430291116, "total_time_sec": 1.702734643011354, "prefill_units_per_sec": 5078.266353532406, "prefill_bytes_per_sec": 14316.389188149868, "decode_units_per_sec": 76.00084309266393, "decode_bytes_per_sec": 219.09618047807024, "total_units_per_sec": 75.1731930311977, "total_bytes_per_sec": 216.7102205352496, "step_latency_ms": {"count": 128, "sum": 1667, "mean": 13.0234375, "median": 13.0, "p95": 13.0, "max": 15, "min": 13}, "peak_allocated_mb": 2749.11376953125, "peak_reserved_mb": 2822.0, "prompt_stats": {"raw_bytes": {"count": 1, "sum": 265, "mean": 265.0, "median": 265.0, "p95": 265.0, "max": 265, "min": 265}, "model_input_units": {"count": 1, "sum": 94, "mean": 94.0, "median": 94.0, "p95": 94.0, "max": 94, "min": 94}}, "generated_stats": {"units": {"count": 1, "sum": 128, "mean": 128.0, "median": 128.0, "p95": 128.0, "max": 128, "min": 128}, "bytes": {"count": 1, "sum": 369, "mean": 369.0, "median": 369.0, "p95": 369.0, "max": 369, "min": 369}}, "compression_summary": {"raw_prompt_bytes": 265, "model_input_units": 94, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 2.8191489361702127, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 1, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 13 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 2, "max_new_tokens": 64, "prefill_time_sec": 0.023427839973010123, "decode_time_sec": 0.8600197209743783, "total_time_sec": 0.8834825509693474, "prefill_units_per_sec": 15835.860259734056, "prefill_bytes_per_sec": 52373.58635766493, "decode_units_per_sec": 148.8337963401346, "decode_bytes_per_sec": 593.0096572927238, "total_units_per_sec": 144.8811862323255, "total_bytes_per_sec": 577.2609763944218, "step_latency_ms": {"count": 64, "sum": 836, "mean": 13.0625, "median": 13.0, "p95": 13.0, "max": 15, "min": 13}, "peak_allocated_mb": 2868.6845703125, "peak_reserved_mb": 2948.0, "prompt_stats": {"raw_bytes": {"count": 2, "sum": 1227, "mean": 613.5, "median": 613.5, "p95": 927.15, "max": 962, "min": 265}, "model_input_units": {"count": 2, "sum": 371, "mean": 185.5, "median": 185.5, "p95": 267.84999999999997, "max": 277, "min": 94}}, "generated_stats": {"units": {"count": 2, "sum": 128, "mean": 64.0, "median": 64.0, "p95": 64.0, "max": 64, "min": 64}, "bytes": {"count": 2, "sum": 510, "mean": 255.0, "median": 255.0, "p95": 277.5, "max": 280, "min": 230}}, "compression_summary": {"raw_prompt_bytes": 1227, "model_input_units": 371, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.307277628032345, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 2, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 14 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 2, "max_new_tokens": 128, "prefill_time_sec": 0.022488684975542128, "decode_time_sec": 1.6893279301002622, "total_time_sec": 1.7118496500188485, "prefill_units_per_sec": 16497.18515793547, "prefill_bytes_per_sec": 54560.77139834723, "decode_units_per_sec": 151.53955335646782, "decode_bytes_per_sec": 623.3248034545336, "total_units_per_sec": 149.54584358339022, "total_bytes_per_sec": 615.1241144269918, "step_latency_ms": {"count": 128, "sum": 1666, "mean": 13.015625, "median": 13.0, "p95": 13.0, "max": 15, "min": 13}, "peak_allocated_mb": 2871.68115234375, "peak_reserved_mb": 2948.0, "prompt_stats": {"raw_bytes": {"count": 2, "sum": 1227, "mean": 613.5, "median": 613.5, "p95": 927.15, "max": 962, "min": 265}, "model_input_units": {"count": 2, "sum": 371, "mean": 185.5, "median": 185.5, "p95": 267.84999999999997, "max": 277, "min": 94}}, "generated_stats": {"units": {"count": 2, "sum": 256, "mean": 128.0, "median": 128.0, "p95": 128.0, "max": 128, "min": 128}, "bytes": {"count": 2, "sum": 1053, "mean": 526.5, "median": 526.5, "p95": 553.05, "max": 556, "min": 497}}, "compression_summary": {"raw_prompt_bytes": 1227, "model_input_units": 371, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.307277628032345, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 2, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 15 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 4, "max_new_tokens": 64, "prefill_time_sec": 0.026762328925542533, "decode_time_sec": 0.8674271870404482, "total_time_sec": 0.8942254839930683, "prefill_units_per_sec": 23316.35642533491, "prefill_bytes_per_sec": 79440.02205170196, "decode_units_per_sec": 295.12563570141214, "decode_bytes_per_sec": 977.6036682609277, "total_units_per_sec": 286.28126192161216, "total_bytes_per_sec": 948.3066801153403, "step_latency_ms": {"count": 64, "sum": 832, "mean": 13.0, "median": 13.0, "p95": 13.0, "max": 13, "min": 13}, "peak_allocated_mb": 3034.2294921875, "peak_reserved_mb": 3168.0, "prompt_stats": {"raw_bytes": {"count": 4, "sum": 2126, "mean": 531.5, "median": 449.5, "p95": 898.2499999999999, "max": 962, "min": 265}, "model_input_units": {"count": 4, "sum": 624, "mean": 156.0, "median": 128.5, "p95": 259.9, "max": 277, "min": 90}}, "generated_stats": {"units": {"count": 4, "sum": 256, "mean": 64.0, "median": 64.0, "p95": 64.0, "max": 64, "min": 64}, "bytes": {"count": 4, "sum": 848, "mean": 212.0, "median": 238.5, "p95": 299.65, "max": 304, "min": 67}}, "compression_summary": {"raw_prompt_bytes": 2126, "model_input_units": 624, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.407051282051282, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 4, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 16 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 4, "max_new_tokens": 128, "prefill_time_sec": 0.032616617041639984, "decode_time_sec": 1.7069068949203938, "total_time_sec": 1.739558152970858, "prefill_units_per_sec": 19131.35256189723, "prefill_bytes_per_sec": 65181.49927338704, "decode_units_per_sec": 299.957778320345, "decode_bytes_per_sec": 1011.7716467953826, "total_units_per_sec": 294.32761366764, "total_bytes_per_sec": 992.7808375078403, "step_latency_ms": {"count": 128, "sum": 1669, "mean": 13.0390625, "median": 13.0, "p95": 13.0, "max": 18, "min": 13}, "peak_allocated_mb": 3034.2294921875, "peak_reserved_mb": 3168.0, "prompt_stats": {"raw_bytes": {"count": 4, "sum": 2126, "mean": 531.5, "median": 449.5, "p95": 898.2499999999999, "max": 962, "min": 265}, "model_input_units": {"count": 4, "sum": 624, "mean": 156.0, "median": 128.5, "p95": 259.9, "max": 277, "min": 90}}, "generated_stats": {"units": {"count": 4, "sum": 512, "mean": 128.0, "median": 128.0, "p95": 128.0, "max": 128, "min": 128}, "bytes": {"count": 4, "sum": 1727, "mean": 431.75, "median": 488.5, "p95": 610.9, "max": 619, "min": 131}}, "compression_summary": {"raw_prompt_bytes": 2126, "model_input_units": 624, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.407051282051282, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 4, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 17 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 8, "max_new_tokens": 64, "prefill_time_sec": 0.04545382398646325, "decode_time_sec": 0.8719166219234467, "total_time_sec": 0.9174054359318689, "prefill_units_per_sec": 21494.341164584162, "prefill_bytes_per_sec": 74845.18796511291, "decode_units_per_sec": 587.2121107985404, "decode_bytes_per_sec": 1782.2805081658823, "total_units_per_sec": 558.0956684433944, "total_bytes_per_sec": 1693.9075561738964, "step_latency_ms": {"count": 64, "sum": 832, "mean": 13.0, "median": 13.0, "p95": 13.0, "max": 13, "min": 13}, "peak_allocated_mb": 3344.9150390625, "peak_reserved_mb": 3578.0, "prompt_stats": {"raw_bytes": {"count": 8, "sum": 3402, "mean": 425.25, "median": 353.0, "p95": 813.2499999999998, "max": 962, "min": 265}, "model_input_units": {"count": 8, "sum": 977, "mean": 122.125, "median": 93.0, "p95": 237.09999999999994, "max": 277, "min": 77}}, "generated_stats": {"units": {"count": 8, "sum": 512, "mean": 64.0, "median": 64.0, "p95": 64.0, "max": 64, "min": 64}, "bytes": {"count": 8, "sum": 1554, "mean": 194.25, "median": 207.0, "p95": 291.7, "max": 298, "min": 67}}, "compression_summary": {"raw_prompt_bytes": 3402, "model_input_units": 977, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.482088024564995, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 8, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 18 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 8, "max_new_tokens": 128, "prefill_time_sec": 0.047648089937865734, "decode_time_sec": 1.7562071910360828, "total_time_sec": 1.8038925749715418, "prefill_units_per_sec": 20504.49454057931, "prefill_bytes_per_sec": 71398.45488950954, "decode_units_per_sec": 583.0747107896115, "decode_bytes_per_sec": 1780.5416217178858, "total_units_per_sec": 567.661297689057, "total_bytes_per_sec": 1733.4735135485168, "step_latency_ms": {"count": 128, "sum": 1665, "mean": 13.0078125, "median": 13.0, "p95": 13.0, "max": 14, "min": 13}, "peak_allocated_mb": 3344.9150390625, "peak_reserved_mb": 3606.0, "prompt_stats": {"raw_bytes": {"count": 8, "sum": 3402, "mean": 425.25, "median": 353.0, "p95": 813.2499999999998, "max": 962, "min": 265}, "model_input_units": {"count": 8, "sum": 977, "mean": 122.125, "median": 93.0, "p95": 237.09999999999994, "max": 277, "min": 77}}, "generated_stats": {"units": {"count": 8, "sum": 1024, "mean": 128.0, "median": 128.0, "p95": 128.0, "max": 128, "min": 128}, "bytes": {"count": 8, "sum": 3127, "mean": 390.875, "median": 444.5, "p95": 576.8, "max": 588, "min": 131}}, "compression_summary": {"raw_prompt_bytes": 3402, "model_input_units": 977, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.482088024564995, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 8, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 19 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 16, "max_new_tokens": 64, "prefill_time_sec": 0.08657659299205989, "decode_time_sec": 0.8774544540792704, "total_time_sec": 0.9640679219737649, "prefill_units_per_sec": 22176.89485859171, "prefill_bytes_per_sec": 78335.26090154634, "decode_units_per_sec": 1167.0121397634282, "decode_bytes_per_sec": 4106.1960347340155, "total_units_per_sec": 1062.1658253119078, "total_bytes_per_sec": 3737.2885435535195, "step_latency_ms": {"count": 64, "sum": 833, "mean": 13.015625, "median": 13.0, "p95": 13.0, "max": 14, "min": 13}, "peak_allocated_mb": 3998.736328125, "peak_reserved_mb": 4412.0, "prompt_stats": {"raw_bytes": {"count": 16, "sum": 6782, "mean": 423.875, "median": 369.0, "p95": 689.75, "max": 962, "min": 260}, "model_input_units": {"count": 16, "sum": 1920, "mean": 120.0, "median": 97.5, "p95": 196.75, "max": 277, "min": 71}}, "generated_stats": {"units": {"count": 16, "sum": 1024, "mean": 64.0, "median": 64.0, "p95": 64.0, "max": 64, "min": 64}, "bytes": {"count": 16, "sum": 3603, "mean": 225.1875, "median": 258.0, "p95": 294.75, "max": 300, "min": 67}}, "compression_summary": {"raw_prompt_bytes": 6782, "model_input_units": 1920, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.5322916666666666, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 16, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
| 20 |
+
{"model": "pythia", "workload": "code/1024B", "batch_size": 16, "max_new_tokens": 128, "prefill_time_sec": 0.08801112801302224, "decode_time_sec": 1.726217148010619, "total_time_sec": 1.8142631959635764, "prefill_units_per_sec": 21815.42315553454, "prefill_bytes_per_sec": 77058.4374171017, "decode_units_per_sec": 1186.4092546874651, "decode_bytes_per_sec": 4035.9928112341645, "total_units_per_sec": 1128.8329083434244, "total_bytes_per_sec": 3840.1264025530454, "step_latency_ms": {"count": 128, "sum": 1662, "mean": 12.984375, "median": 13.0, "p95": 13.0, "max": 14, "min": 12}, "peak_allocated_mb": 3998.736328125, "peak_reserved_mb": 4690.0, "prompt_stats": {"raw_bytes": {"count": 16, "sum": 6782, "mean": 423.875, "median": 369.0, "p95": 689.75, "max": 962, "min": 260}, "model_input_units": {"count": 16, "sum": 1920, "mean": 120.0, "median": 97.5, "p95": 196.75, "max": 277, "min": 71}}, "generated_stats": {"units": {"count": 16, "sum": 2048, "mean": 128.0, "median": 128.0, "p95": 128.0, "max": 128, "min": 128}, "bytes": {"count": 16, "sum": 6967, "mean": 435.4375, "median": 494.5, "p95": 606.0, "max": 615, "min": 131}}, "compression_summary": {"raw_prompt_bytes": 6782, "model_input_units": 1920, "stage0_units": null, "stage1_units": null, "bytes_per_stage0_unit": 3.5322916666666666, "stage0_per_stage1_unit": null}, "trace": null, "status": "ok", "num_samples": 16, "repeat_idx": -1, "chunk_idx": 0, "phase": "warmup"}
|
pythia_1_4b_only_generation_speed/2026-05-11/14-19-07/warmup_summary.csv
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,workload,batch_size,max_new_tokens,status,num_samples,num_chunks,num_failures,prompt_bytes_mean,generated_units_mean,generated_bytes_mean,prefill_time_sec,decode_time_sec,total_time_sec,prefill_bytes_per_sec,prefill_units_per_sec,decode_bytes_per_sec,decode_units_per_sec,total_bytes_per_sec,total_units_per_sec,speedup_vs_batch1,peak_allocated_mb,peak_reserved_mb
|
| 2 |
+
pythia,code/1024B,1,64,ok,1,1,0,265.0,64.0,197.0,0.021051,0.83399,0.855074,12588.517453,4465.360908,236.213755,76.739494,230.389304,74.847287,1.0,2737.11377,2806.0
|
| 3 |
+
pythia,code/1024B,1,128,ok,1,1,0,265.0,128.0,369.0,0.01851,1.684192,1.702735,14316.389188,5078.266354,219.09618,76.000843,216.710221,75.173193,1.0,2749.11377,2822.0
|
| 4 |
+
pythia,code/1024B,2,64,ok,2,1,0,613.5,64.0,255.0,0.023428,0.86002,0.883483,52373.586358,15835.86026,593.009657,148.833796,577.260976,144.881186,2.505589,2868.68457,2948.0
|
| 5 |
+
pythia,code/1024B,2,128,ok,2,1,0,613.5,128.0,526.5,0.022489,1.689328,1.71185,54560.771398,16497.185158,623.324803,151.539553,615.124114,149.545844,2.838464,2871.681152,2948.0
|
| 6 |
+
pythia,code/1024B,4,64,ok,4,1,0,531.5,64.0,212.0,0.026762,0.867427,0.894225,79440.022052,23316.356425,977.603668,295.125636,948.30668,286.281262,4.116105,3034.229492,3168.0
|
| 7 |
+
pythia,code/1024B,4,128,ok,4,1,0,531.5,128.0,431.75,0.032617,1.706907,1.739558,65181.499273,19131.352562,1011.771647,299.957778,992.780838,294.327614,4.581145,3034.229492,3168.0
|
| 8 |
+
pythia,code/1024B,8,64,ok,8,1,0,425.25,64.0,194.25,0.045454,0.871917,0.917405,74845.187965,21494.341165,1782.280508,587.212111,1693.907556,558.095668,7.352371,3344.915039,3578.0
|
| 9 |
+
pythia,code/1024B,8,128,ok,8,1,0,425.25,128.0,390.875,0.047648,1.756207,1.803893,71398.45489,20504.494541,1780.541622,583.074711,1733.473514,567.661298,7.999039,3344.915039,3606.0
|
| 10 |
+
pythia,code/1024B,16,64,ok,16,1,0,423.875,64.0,225.1875,0.086577,0.877454,0.964068,78335.260902,22176.894859,4106.196035,1167.01214,3737.288544,1062.165825,16.221623,3998.736328,4412.0
|
| 11 |
+
pythia,code/1024B,16,128,ok,16,1,0,423.875,128.0,435.4375,0.088011,1.726217,1.814263,77058.437417,21815.423156,4035.992811,1186.409255,3840.126403,1128.832908,17.720098,3998.736328,4690.0
|
| 12 |
+
pythia,code/256B,1,64,ok,1,1,0,212.0,64.0,179.0,0.177282,0.85831,1.035627,1195.836328,456.899729,208.549316,74.565119,172.842172,61.798319,1.0,2734.67627,2806.0
|
| 13 |
+
pythia,code/256B,1,128,ok,1,1,0,212.0,128.0,399.0,0.018029,1.659766,1.677826,11759.067823,4492.851385,240.39538,77.11932,237.807672,76.289178,1.0,2746.67627,2822.0
|
| 14 |
+
pythia,code/256B,2,64,ok,2,1,0,196.0,64.0,167.0,0.05679,0.839757,0.896585,6902.577028,2359.554392,397.73397,152.424994,372.52459,142.763915,2.155288,2783.519043,2840.0
|
| 15 |
+
pythia,code/256B,2,128,ok,2,1,0,196.0,128.0,352.5,0.024385,1.6792,1.703618,16075.518235,5495.202662,419.842788,152.453551,413.825066,150.268393,1.740167,2796.378418,2860.0
|
| 16 |
+
pythia,code/256B,4,64,ok,4,1,0,187.75,59.0,161.5,0.020853,0.841698,0.862583,36014.740272,11845.061048,767.496627,280.385765,748.913578,273.596911,4.332933,2818.698242,2864.0
|
| 17 |
+
pythia,code/256B,4,128,ok,4,1,0,187.75,107.0,301.75,0.020519,1.701298,1.72185,36600.94473,12037.86065,709.458432,251.572667,700.990287,248.569878,2.947719,2877.401367,2904.0
|
| 18 |
+
pythia,code/256B,8,64,ok,8,1,0,190.25,61.5,180.625,0.024498,0.853088,0.877619,62126.452965,19348.185746,1693.846979,576.728522,1646.500751,560.607868,9.526036,2930.352539,3054.0
|
| 19 |
+
pythia,code/256B,8,128,ok,8,1,0,190.25,117.5,347.75,0.024205,1.705093,1.729331,62880.609418,19583.054444,1631.582659,551.289612,1608.714502,543.562772,6.764771,3031.133789,3174.0
|
| 20 |
+
pythia,code/256B,16,64,ok,16,1,0,179.75,62.75,245.8125,0.031097,0.927524,0.958661,92486.244903,28588.411585,4240.320922,1082.451616,4102.597782,1047.294222,23.736092,3152.875977,3322.0
|
| 21 |
+
pythia,code/256B,16,128,ok,16,1,0,179.75,122.75,513.8125,0.033466,1.759893,1.793394,85937.037199,26563.986812,4671.307608,1115.977149,4584.045624,1095.130228,19.276273,3367.161133,3850.0
|