From afdb779c87661d35f496d0e8c029a588fcd1cf86 Mon Sep 17 00:00:00 2001 From: Majid Dadashi Date: Wed, 4 Dec 2024 13:16:12 -0800 Subject: [PATCH] Add a flag protected pass to lower fake_quant annotation. LowerQuantAnnotationsPass is added which lowers quant.fake_quant composites to a pair of tfl.Quantize-tfl.Dequantize ops which are later consumed by the converter quantization passes. PiperOrigin-RevId: 702836166 --- tflite/python/convert.py | 5 +++++ tflite/python/lite.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/tflite/python/convert.py b/tflite/python/convert.py index 973963d6..ea8b7b61 100644 --- a/tflite/python/convert.py +++ b/tflite/python/convert.py @@ -447,6 +447,7 @@ def build_conversion_flags( use_buffer_offset=False, reduce_type_precision=False, qdq_conversion_mode=None, + strict_qdq_mode=False, disable_per_channel_quantization_for_dense_layers=False, enable_composite_direct_lowering=False, model_origin_framework=lite_constants.UNSET, @@ -578,6 +579,9 @@ def build_conversion_flags( This could have side effects e.g. reduced flatbuffer size. qdq_conversion_mode: If set, assume input model is a quantized model represented with QDQ ops and convert to quantized kernels. + strict_qdq_mode: If set, adheres to the QDQ annotations added by the + framework when possible rather than quantizing any op that is possible to + quantize. disable_per_channel_quantization_for_dense_layers: If set, disables per channel end enables per tensor integer quantization for weights in Dense layers. The flag works only for integer quantized model. @@ -706,6 +710,7 @@ def build_conversion_flags( conversion_flags.reduce_type_precision = reduce_type_precision if qdq_conversion_mode is not None: conversion_flags.qdq_conversion_mode = qdq_conversion_mode + conversion_flags.strict_qdq_mode = strict_qdq_mode conversion_flags.disable_per_channel_quantization_for_dense_layers = ( disable_per_channel_quantization_for_dense_layers ) diff --git a/tflite/python/lite.py b/tflite/python/lite.py index eb395f58..cde87f0d 100644 --- a/tflite/python/lite.py +++ b/tflite/python/lite.py @@ -680,6 +680,7 @@ def __init__(self): self._experimental_enable_composite_direct_lowering = False self.model_origin_framework = constants.UNSET self.canonicalizing_inf_as_min_max_float = True + self._experimental_strict_qdq = False # Debug parameters self.ir_dump_dir = None @@ -837,6 +838,7 @@ def _get_base_converter_args(self): self.experimental_stablehlo_quantizer_config ), "qdq_conversion_mode": self._experimental_qdq_conversion_mode, + "strict_qdq_mode": self._experimental_strict_qdq, "disable_per_channel_quantization_for_dense_layers": ( self._experimental_disable_per_channel_quantization_for_dense_layers ),