{ "quantization_method": "mixed_precision_nf4", "model_type": "ernie", "description": "First and last blocks plus boundary modules kept at bfloat16, middle layers quantized to NF4 (ernie architecture)", "high_precision_layers_count": 20 }