Skip to content

[cherry-pick] Fix the bug of exporting model in dygraph QAT #47028

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 30 additions & 23 deletions python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def __init__(self,
weight_preprocess_layer=None,
act_preprocess_layer=None,
weight_quantize_layer=None,
act_quantize_layer=None):
act_quantize_layer=None,
onnx_format=False):
"""
The constructor for ImperativeQuantAware.

Expand Down Expand Up @@ -124,6 +125,8 @@ def __init__(self,
activation and returns dequantized activation.
If None, will use quantization op defined by 'activation_quantize_type'.
Default is None.
onnx_format (bool, optional): Whether to export the quantized model
with format of ONNX. Default is False.

Note:
If user sets attribute 'skip_quant' to a Layer that support dynamic
Expand Down Expand Up @@ -224,7 +227,7 @@ def forward(self, inputs):
self._quantize_inputs = ImperativeQuantizeInputs(**kwargs)

self._quantize_outputs = ImperativeQuantizeOutputs(
moving_rate, activation_bits)
moving_rate, activation_bits, onnx_format)

def quantize(self, model):
"""
Expand Down Expand Up @@ -413,7 +416,7 @@ class ImperativeQuantizeOutputs(object):
Calculate the output scales for target layers.
"""

def __init__(self, moving_rate=0.9, activation_bits=8):
def __init__(self, moving_rate=0.9, activation_bits=8, onnx_format=False):
"""
The constructor for ImperativeQuantizeOutputs.

Expand All @@ -425,6 +428,7 @@ def __init__(self, moving_rate=0.9, activation_bits=8):
super(ImperativeQuantizeOutputs, self).__init__()
self._moving_rate = moving_rate
self._activation_bits = activation_bits
self._onnx_format = onnx_format

def apply(self, model):
"""
Expand Down Expand Up @@ -461,12 +465,7 @@ def apply(self, model):

setattr(parent_layer, sub_name, cur_quant_layer)

def save_quantized_model(self,
model,
path,
input_spec=None,
onnx_format=False,
**config):
def save_quantized_model(self, model, path, input_spec=None, **config):
"""
Save the quantized model for the inference.

Expand All @@ -479,8 +478,6 @@ def save_quantized_model(self,
InputSpec or example Tensor. If None, all input variables of
the original Layer's forward method would be the inputs of
the saved model. Default None.
onnx_format (bool, optional): Whether to export the quantized model
with format of ONNX. Default is False.
**config (dict, optional): Other save configuration options for
compatibility. We do not recommend using these configurations,
they may be removed in the future. If not necessary, DO NOT use
Expand Down Expand Up @@ -521,7 +518,7 @@ def save_quantized_model(self,
model_filename=model_filename,
params_filename=params_filename))

if not onnx_format:
if not self._onnx_format:
self._gather_scales(infer_program, scope, fetch_targets)

# Remove `moving_average_abs_max_scale` node in sub graphs.
Expand All @@ -540,10 +537,14 @@ def save_quantized_model(self,
graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
transform_pass = ReplaceFakeQuantDequantPass(
scope, place, quant_bits=self._activation_bits)
transform_pass.apply(graph)
for sub_graph in graph.all_sub_graphs():
sub_graph._for_test = True
transform_pass.apply(sub_graph)

quant_weight_pass = QuantWeightPass(scope, place)
quant_weight_pass.apply(graph)
for sub_graph in graph.all_sub_graphs():
sub_graph._for_test = True
quant_weight_pass.apply(sub_graph)

infer_program = graph.to_program()

Expand All @@ -565,18 +566,24 @@ def _is_target_layer(self, layer):
"""
Whether the layer needs to calculate output scales.
"""
# exclude fake_quant ops in quant_layers file
if not isinstance(layer, dygraph.Layer):
return False

if self._onnx_format:
return True if isinstance(layer, tuple(
utils.fake_quant_wrap_layers)) else False

flag = False
if isinstance(layer, dygraph.Layer):
# exclude fake_quant ops in quant_layers file
if utils.is_leaf_layer(layer) and \
not isinstance(layer, tuple(utils.fake_quant_leaf_layers)):
flag = True
if utils.is_leaf_layer(layer) and \
not isinstance(layer, tuple(utils.fake_quant_leaf_layers)):
flag = True

if isinstance(layer, tuple(utils.fake_quant_wrap_layers)):
flag = True
if isinstance(layer, tuple(utils.fake_quant_wrap_layers)):
flag = True

if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
flag = True
if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
flag = True

return flag

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
_fake_quant_dequant_op_list = [
'fake_quantize_dequantize_moving_average_abs_max',
"fake_channel_wise_quantize_dequantize_abs_max",
"fake_quantize_dequantize_abs_max",
]

_conv_ops = ['conv2d', 'depthwise_conv2d', 'conv2d_transpose']
Expand Down
4 changes: 3 additions & 1 deletion python/paddle/fluid/contrib/slim/quantization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,11 @@ def _clip(x, scale):
x[x < -scale] = -scale
return x

assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.'
bnt = (1 << (weight_bits - 1)) - 1
if isinstance(scale, list) and len(scale) == 1:
scale = scale[0]
if isinstance(scale, list):
assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.'
for i, s in enumerate(scale):
if s == 0.0:
s = 1e-8
Expand Down
6 changes: 3 additions & 3 deletions python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def func_qat(self):
imperative_qat = ImperativeQuantAware(
weight_quantize_type=self.weight_quantize_type,
activation_quantize_type=self.activation_quantize_type,
fuse_conv_bn=self.fuse_conv_bn)
fuse_conv_bn=self.fuse_conv_bn,
onnx_format=self.onnx_format)

with fluid.dygraph.guard():
# For CI coverage
Expand Down Expand Up @@ -187,8 +188,7 @@ def func_qat(self):
input_spec=[
paddle.static.InputSpec(shape=[None, 1, 28, 28],
dtype='float32')
],
onnx_format=self.onnx_format)
])
print('Quantized model saved in %s' % tmpdir)

if core.is_compiled_with_cuda():
Expand Down