File size: 17,776 Bytes
747451d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 | # /*---------------------------------------------------------------------------------------------
# * Copyright (c) 2022-2023 STMicroelectronics.
# * All rights reserved.
# *
# * This software is licensed under terms that can be found in the LICENSE file in
# * the root directory of this software component.
# * If no LICENSE file comes with this software, it is provided AS-IS.
# *--------------------------------------------------------------------------------------------*/
from onnxruntime.quantization import QuantType, CalibrationMethod
import keras
import onnx
from onnx import numpy_helper
import os
import numpy as np
from omegaconf import DictConfig
import tensorflow
dict_types = {"Int16": QuantType.QInt16,
"UInt16": QuantType.QUInt16,
"Int8": QuantType.QInt8,
"UInt8": QuantType.QUInt8,
"Int4": QuantType.QInt4,
"UInt4": QuantType.QUInt4
}
def get_weights_activations_quant_type(cfg: DictConfig):
"""
Converts bit_width type string in onnx type
Inputs:
cfg: dict of input parameters
Returns:
weight, activation type
"""
# get Onnx type for weights and activations: Int4, Int8, Int16
if cfg.quantization.onnx_quant_parameters:
if cfg.quantization.onnx_quant_parameters.weightType:
weight_type = dict_types[cfg.quantization.onnx_quant_parameters.weightType]
else: weight_type = QuantType.QInt8
if cfg.quantization.onnx_quant_parameters.activType:
activation_type = dict_types[cfg.quantization.onnx_quant_parameters.activType]
else: activation_type = QuantType.QInt8
else:
weight_type = QuantType.QInt8
activation_type = QuantType.QInt8
return weight_type, activation_type
def get_calibration_method(cfg: DictConfig):
"""
Converts calibration method from string in onnx class type
Inputs:
cfg: dict of input parameters
Returns:
calibration_method
"""
if cfg.quantization.onnx_quant_parameters:
calibration_param = cfg.quantization.onnx_quant_parameters.calibrate_method
else: calibration_param = None
if calibration_param is None:
calibration_method = CalibrationMethod.MinMax
elif calibration_param == "MinMax":
calibration_method = CalibrationMethod.MinMax
elif calibration_param == "Entropy":
calibration_method = CalibrationMethod.Entropy
else:
raise ValueError(f"Unsupported calibration method: {calibration_param}. Review your config yaml file at section"
f"quantization_parameters. Only MinMax or Entropy are supported so far.")
return calibration_method
def update_bit_width(tensor_type: str = None, order: int = None, direction: str = None):
"""
update the bit width order times, increasing or decreasing
Inputs:
tensor_type(str): Int4, UInt4, Int8, UInt8, Int16, or UInt16
order (int): number of times we update the type in the specified way, must be 1 or 2
direction (str): 'up' increase the bit width, 'down': decrease the bit_width
Returns:
updated_type (str): Int4, UInt4, Int8, UInt8, Int16, or UInt16
"""
if tensor_type == 'Int4':
if direction == 'down':
return dict_types[tensor_type]
else:
if order == 1:
return dict_types['Int8']
if order >= 2:
return dict_types['Int16']
elif tensor_type == 'UInt4':
if direction == 'down':
return dict_types[tensor_type]
else:
if order == 1:
return dict_types['UInt8']
if order >= 2:
return dict_types['UInt16']
elif tensor_type == 'Int8':
if direction == 'down':
if order >= 1:
return dict_types['Int4']
else:
if order >= 1:
return dict_types['Int16']
elif tensor_type == 'UInt8':
if direction == 'down':
if order >= 1:
return dict_types['UInt4']
else:
if order >= 1:
return dict_types['UInt16']
elif tensor_type == 'Int16':
if direction == 'down':
if order == 1:
return dict_types['Int8']
if order >= 2:
return dict_types['Int4']
else:
return dict_types[tensor_type]
elif tensor_type == 'UInt16':
if direction == 'down':
if order == 1:
return dict_types['UInt8']
if order >= 2:
return dict_types['UInt4']
else:
return dict_types[tensor_type]
else:
raise ValueError(f"Unsupported 'update_bit_width' function parameters. Check tensor_type = {tensor_type}, "
f"order = {order} and direction = {direction}")
def define_extra_options(cfg: dict = None, list_weights_tensors=None, list_activation_tensors=None, axis_list=None,
bit_width_w=None, bit_width_a=None):
"""
Set ONNX quantizer extra options according to config file
Inputs:
cfg (dict): dictionary of configuration parameters
list_weights_tensors (str): list of weights tensor names for which we want to override quantization
parameters. If None, ignored
list_activation_tensors (str): list of activation tensor names for which we want to override
quantization parameters. If None, ignored
bit_width_w (QuantType): QuantType.QInt16 or QuantType.QUInt16 or QuantType.QInt8 or QuantType.QUInt8,
or QuantType.QInt4 or QuantType.QUInt4 for all weights
bit_width_a (QuantType): QuantType.QInt16 or QuantType.QUInt16 or QuantType.QInt8 or QuantType.QUInt8,
or QuantType.QInt4 or QuantType.QUInt4 for all activations
Returns:
a dictionary with all extra options set
"""
extra_options = dict()
# when variable is not defined in cfg, extra_options dict receives None. Therefore, Onnx uses its default values
if cfg.quantization.onnx_extra_options:
extra_options["WeightSymmetric"] = cfg.quantization.onnx_extra_options.WeightSymmetric \
if cfg.quantization.onnx_extra_options.WeightSymmetric is not None else True
extra_options["ActivationSymmetric"] = cfg.quantization.onnx_extra_options.ActivationSymmetric \
if cfg.quantization.onnx_extra_options.ActivationSymmetric is not None else False
extra_options["CalibMovingAverage"] = cfg.quantization.onnx_extra_options.CalibMovingAverage \
if cfg.quantization.onnx_extra_options.CalibMovingAverage is not None else False
extra_options["QuantizeBias"] = cfg.quantization.onnx_extra_options.QuantizeBias \
if cfg.quantization.onnx_extra_options.QuantizeBias is not None else True
extra_options["SmoothQuant"] = cfg.quantization.onnx_extra_options.SmoothQuant \
if cfg.quantization.onnx_extra_options.SmoothQuant is not None else False
extra_options["SmoothQuantAlpha"] = cfg.quantization.onnx_extra_options.SmoothQuantAlpha \
if cfg.quantization.onnx_extra_options.SmoothQuantAlpha is not None else 0.5
extra_options["SmoothQuantFolding"] = cfg.quantization.onnx_extra_options.SmoothQuantFolding \
if cfg.quantization.onnx_extra_options.SmoothQuantFolding is not None else True
else:
extra_options["WeightSymmetric"] = True
extra_options["ActivationSymmetric"] = False
extra_options["CalibMovingAverage"] = False
extra_options["QuantizeBias"] = True
extra_options["SmoothQuant"] = False
extra_options["SmoothQuantAlpha"] = 0.5
extra_options["SmoothQuantFolding"] = True
extra_options["TensorQuantOverrides"] = {}
# Code for setting a specific bit_width for some weights tensor.
# if we want to keep per-channel quantization we need to add "axis" field for the weights
if list_weights_tensors:
for idx, e in enumerate(list_weights_tensors):
extra_options["TensorQuantOverrides"][e] = [{'quant_type': bit_width_w}]
if cfg.quantization.granularity == 'per_channel':
extra_options["TensorQuantOverrides"][e][0]["axis"] = axis_list[idx]
# Code for setting a specific bit_width for some activations tensors
if list_activation_tensors:
for e in list_activation_tensors:
extra_options["TensorQuantOverrides"][e] = [{'quant_type': bit_width_a}]
# case where overrides would be specified in the yaml
# we consider quant_type is mandatory, scale and offset optional. We don't support (yet?) scale and offset
# per channel but that would be very impractical to write the full list in the yaml. Per-tensor is ok
if not list_weights_tensors and not list_activation_tensors:
list_override_tensor = []
if cfg.quantization.onnx_extra_options:
if cfg.quantization.onnx_extra_options.weights_tensor_override:
list_override_tensor = cfg.quantization.onnx_extra_options.weights_tensor_override
if cfg.quantization.onnx_extra_options.activations_tensor_override:
list_override_tensor = list_override_tensor + cfg.quantization.onnx_extra_options.activations_tensor_override
if list_override_tensor:
for idx, t in enumerate(list_override_tensor):
# conversion string to type and zero point to int, and scale to np.array float32 as required by ONNX
t[1].quant_type = dict_types[t[1].quant_type]
if t[1].zero_point is not None: t[1].zero_point = np.array(t[1].zero_point)
if t[1].scale is not None: t[1].scale = np.array(t[1].scale, dtype=np.float32)
extra_options["TensorQuantOverrides"][t[0]] = [dict(t[1])]
return extra_options
def count_weights(onnx_model_path_quant, w_tensor_name: str = None):
"""
Count weights that are in 4 bits and weights that are in 8 bits and output the ratio
Args:
onnx_model_path_quant: the ONNX quantized model path
w_tensor_name (str): name of any tensor that has weights
Returns:
nb_weights_4bits, nb_weights_8bits, total_count_weights
"""
nb_weights_4bits = nb_weights_8bits = total_count_weights = 0
w_values_t_names = [w + '_quantized' for w in w_tensor_name] # extension needed to get the weights values
model_quant = onnx.load(onnx_model_path_quant)
initializers = [initializer for initializer in model_quant.graph.initializer if initializer.name in w_values_t_names]
for initializer in initializers:
tensor_values = numpy_helper.to_array(initializer)
n_weights = tensor_values.size
total_count_weights += n_weights
# since np.int4 does not exist we identify 4 bits tensors by the dynamic range having in mind that 4 bits
# means 16 integers
dyn_range = int(tensor_values.max()) - int(tensor_values.min())
if dyn_range <= 15: # 4 bits
nb_weights_4bits += n_weights
elif tensor_values.dtype == np.int8:
nb_weights_8bits += n_weights
return nb_weights_4bits, nb_weights_8bits, total_count_weights
def weights_based_layer_ranking(model, extension: str = None):
"""
Count weights (not bias) per layer in a Keras or onnx model
Args:
model: the keras or onnx model
extension (str): model backend, expected 'onnx' or 'keras'
Returns:
list of layer name and weight number after ranking
"""
layer_params = []
if extension == 'keras':
for layer in model.layers:
if hasattr(layer, "kernel"): # Only include layers with weights
num_params = layer.get_weights()[0].size
layer_params.append((layer.name, num_params))
elif extension == 'onnx':
initializers = {init.name: init for init in model.graph.initializer}
for node in model.graph.node:
if node.op_type in ['Conv', 'ConvTranspose', 'Gemm', 'MatMul']: # there are weights
initializer = initializers[node.input[1]]
num_params = int(np.prod(list(initializer.dims)))
layer_params.append((initializer.name, num_params))
# Sort by number of parameters in descending order
layer_params_ranked = sorted(layer_params, key=lambda x: x[1], reverse=True)
return layer_params_ranked
def _get_initializer_tensor(model, name: str = None):
"""
Report weights (not bias) corresponding to tensor 'name' if exists, in an onnx model
Args:
model: the onnx model
name (str): tensor name
Returns: weight tensor values
"""
for tensor in model.graph.initializer:
if tensor.name == name:
return onnx.numpy_helper.to_array(tensor)
return None
def _onnx_node_identity_card(model, node):
"""
Returns a list with node characteristic in order to make a ranking later-on
Args:
model: the onnx model
node: node under consideration
Returns:
list of node characteristics
"""
node_card_list = []
group = 1
if node.op_type == "Conv":
for attr in node.attribute:
if attr.name == "group":
group = attr.i
break
weight_name = node.input[1]
weight = _get_initializer_tensor(model, weight_name)
# onnx 'Conv' weight expected shape: [out_channels, in_channels_per_group, kH, kW]
out_channels, in_channels_per_group, kH, kW = weight.shape
nparams = int(np.prod([out_channels, in_channels_per_group, kH, kW]))
params_per_scale = int(np.prod([in_channels_per_group, kH, kW]))
# For depthwise, we expect in_channels_per_group == 1 and group == in_channels == out_channels
if in_channels_per_group == 1 and group == out_channels:
layer_type = 0 # DW
else:
layer_type = 1 # Conv2D
node_card_list = [node.name, layer_type, nparams, params_per_scale]
elif node.op_type in ["Gemm", "MatMul"]:
weight_name = node.input[1]
weight = _get_initializer_tensor(model, weight_name)
# There is a possibility that MatMul works with 2 tensors none of them being of type 'initializer' so with no weights
# in this case we report an empty list
if weight is not None:
out_channels, in_channels = weight.shape
nparams = out_channels * in_channels
params_per_scale = out_channels
node_card_list = [node.name, 2, nparams, params_per_scale]
else:
node_card_list = []
return node_card_list
def _keras_layer_identity_card(layer):
"""
Returns a list with node characteristic in order to make a ranking later-on
Args:
layer: layer under consideration
Returns:
list of layers characteristics
"""
layer_card_list = []
if hasattr(layer, "kernel"): # Only include layers with weights
weight = layer.get_weights()[0]
nparams = weight.size
if isinstance(layer, tensorflow.keras.layers.DepthwiseConv2D):
layer_type = 0 # DW
kH, kW, in_ch, depth_mult = weight.shape
params_per_scale = kH * kW
elif isinstance(layer, tensorflow.keras.layers.Conv2D):
layer_type = 1 # Conv2D
kH, kW, in_ch, out_ch = weight.shape
params_per_scale = int(np.prod([kH, kW, in_ch]))
elif isinstance(layer, tensorflow.keras.layers.Dense):
layer_type = 2 # Dense
in_dim, out_dim = weight.shape
params_per_scale = out_dim
layer_card_list = [layer.name, layer_type, nparams, params_per_scale]
return layer_card_list
def composite_score_layer_ranking(model, extension: str = None):
"""
Count weights (not bias) per layer in a Keras model
Args:
model: the model either onnx or keras
extension: model backend, expected 'onnx' or 'keras'
Returns:
list of layer names ranked
"""
layer_params = []
if extension == '.keras':
for layer in model.layers:
layer_card_list = _keras_layer_identity_card(layer)
if layer_card_list:
layer_params.append(layer_card_list)
elif extension == '.onnx':
for node in model.graph.node:
node_card_list = _onnx_node_identity_card(model, node)
if node_card_list:
layer_params.append(node_card_list)
# Ranking of layers for mixed precision w4w8 quantization
layer_params_ranked = sorted(layer_params, key=lambda x: (x[2], -x[3], x[0]), reverse=True)
return layer_params_ranked |