gpu-runtime-predictor / gpu_catalog.json
RajBhope's picture
Upload gpu_catalog.json with huggingface_hub
a9d6cc9 verified
{
"NVIDIA_T4": {
"name": "NVIDIA T4",
"cuda_cores": 2560,
"tensor_cores": 320,
"memory_gb": 16,
"memory_bandwidth_gbps": 320,
"base_clock_mhz": 585,
"boost_clock_mhz": 1590,
"sm_count": 40,
"fp32_tflops": 8.1,
"fp16_tflops": 65,
"tdp_watts": 70,
"compute_capability": 7.5,
"l2_cache_mb": 4
},
"NVIDIA_V100": {
"name": "NVIDIA V100",
"cuda_cores": 5120,
"tensor_cores": 640,
"memory_gb": 32,
"memory_bandwidth_gbps": 900,
"base_clock_mhz": 1230,
"boost_clock_mhz": 1530,
"sm_count": 80,
"fp32_tflops": 15.7,
"fp16_tflops": 125,
"tdp_watts": 300,
"compute_capability": 7.0,
"l2_cache_mb": 6
},
"NVIDIA_A10G": {
"name": "NVIDIA A10G",
"cuda_cores": 9216,
"tensor_cores": 288,
"memory_gb": 24,
"memory_bandwidth_gbps": 600,
"base_clock_mhz": 885,
"boost_clock_mhz": 1695,
"sm_count": 80,
"fp32_tflops": 31.2,
"fp16_tflops": 62.5,
"tdp_watts": 150,
"compute_capability": 8.6,
"l2_cache_mb": 6
},
"NVIDIA_A100_40GB": {
"name": "NVIDIA A100 40GB",
"cuda_cores": 6912,
"tensor_cores": 432,
"memory_gb": 40,
"memory_bandwidth_gbps": 1555,
"base_clock_mhz": 765,
"boost_clock_mhz": 1410,
"sm_count": 108,
"fp32_tflops": 19.5,
"fp16_tflops": 312,
"tdp_watts": 400,
"compute_capability": 8.0,
"l2_cache_mb": 40
},
"NVIDIA_A100_80GB": {
"name": "NVIDIA A100 80GB",
"cuda_cores": 6912,
"tensor_cores": 432,
"memory_gb": 80,
"memory_bandwidth_gbps": 2039,
"base_clock_mhz": 765,
"boost_clock_mhz": 1410,
"sm_count": 108,
"fp32_tflops": 19.5,
"fp16_tflops": 312,
"tdp_watts": 400,
"compute_capability": 8.0,
"l2_cache_mb": 40
},
"NVIDIA_L4": {
"name": "NVIDIA L4",
"cuda_cores": 7424,
"tensor_cores": 232,
"memory_gb": 24,
"memory_bandwidth_gbps": 300,
"base_clock_mhz": 795,
"boost_clock_mhz": 2040,
"sm_count": 58,
"fp32_tflops": 30.3,
"fp16_tflops": 121,
"tdp_watts": 72,
"compute_capability": 8.9,
"l2_cache_mb": 48
},
"NVIDIA_L40S": {
"name": "NVIDIA L40S",
"cuda_cores": 18176,
"tensor_cores": 568,
"memory_gb": 48,
"memory_bandwidth_gbps": 864,
"base_clock_mhz": 1110,
"boost_clock_mhz": 2520,
"sm_count": 142,
"fp32_tflops": 91.6,
"fp16_tflops": 183.2,
"tdp_watts": 350,
"compute_capability": 8.9,
"l2_cache_mb": 96
},
"NVIDIA_RTX_3090": {
"name": "NVIDIA RTX 3090",
"cuda_cores": 10496,
"tensor_cores": 328,
"memory_gb": 24,
"memory_bandwidth_gbps": 936,
"base_clock_mhz": 1395,
"boost_clock_mhz": 1695,
"sm_count": 82,
"fp32_tflops": 35.6,
"fp16_tflops": 71,
"tdp_watts": 350,
"compute_capability": 8.6,
"l2_cache_mb": 6
},
"NVIDIA_RTX_4090": {
"name": "NVIDIA RTX 4090",
"cuda_cores": 16384,
"tensor_cores": 512,
"memory_gb": 24,
"memory_bandwidth_gbps": 1008,
"base_clock_mhz": 2235,
"boost_clock_mhz": 2520,
"sm_count": 128,
"fp32_tflops": 82.6,
"fp16_tflops": 165.2,
"tdp_watts": 450,
"compute_capability": 8.9,
"l2_cache_mb": 72
},
"NVIDIA_H100_SXM": {
"name": "NVIDIA H100 SXM",
"cuda_cores": 16896,
"tensor_cores": 528,
"memory_gb": 80,
"memory_bandwidth_gbps": 3350,
"base_clock_mhz": 1095,
"boost_clock_mhz": 1830,
"sm_count": 132,
"fp32_tflops": 67,
"fp16_tflops": 989,
"tdp_watts": 700,
"compute_capability": 9.0,
"l2_cache_mb": 50
},
"NVIDIA_H100_PCIe": {
"name": "NVIDIA H100 PCIe",
"cuda_cores": 14592,
"tensor_cores": 456,
"memory_gb": 80,
"memory_bandwidth_gbps": 2039,
"base_clock_mhz": 1095,
"boost_clock_mhz": 1620,
"sm_count": 114,
"fp32_tflops": 48,
"fp16_tflops": 756,
"tdp_watts": 350,
"compute_capability": 9.0,
"l2_cache_mb": 50
},
"NVIDIA_RTX_A6000": {
"name": "NVIDIA RTX A6000",
"cuda_cores": 10752,
"tensor_cores": 336,
"memory_gb": 48,
"memory_bandwidth_gbps": 768,
"base_clock_mhz": 1410,
"boost_clock_mhz": 1860,
"sm_count": 84,
"fp32_tflops": 38.7,
"fp16_tflops": 77.4,
"tdp_watts": 300,
"compute_capability": 8.6,
"l2_cache_mb": 6
}
}