From afdb779c87661d35f496d0e8c029a588fcd1cf86 Mon Sep 17 00:00:00 2001
From: Majid Dadashi <majiddadashi@google.com>
Date: Wed, 4 Dec 2024 13:16:12 -0800
Subject: [PATCH] Add a flag protected pass to lower fake_quant annotation.

LowerQuantAnnotationsPass is added which lowers quant.fake_quant composites to a pair of tfl.Quantize-tfl.Dequantize ops which are later consumed by the converter quantization passes.

PiperOrigin-RevId: 702836166
---
 tflite/python/convert.py | 5 +++++
 tflite/python/lite.py    | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/tflite/python/convert.py b/tflite/python/convert.py
index 973963d6..ea8b7b61 100644
--- a/tflite/python/convert.py
+++ b/tflite/python/convert.py
@@ -447,6 +447,7 @@ def build_conversion_flags(
     use_buffer_offset=False,
     reduce_type_precision=False,
     qdq_conversion_mode=None,
+    strict_qdq_mode=False,
     disable_per_channel_quantization_for_dense_layers=False,
     enable_composite_direct_lowering=False,
     model_origin_framework=lite_constants.UNSET,
@@ -578,6 +579,9 @@ def build_conversion_flags(
       This could have side effects e.g. reduced flatbuffer size.
     qdq_conversion_mode: If set, assume input model is a quantized model
       represented with QDQ ops and convert to quantized kernels.
+    strict_qdq_mode: If set, adheres to the QDQ annotations added by the
+      framework when possible rather than quantizing any op that is possible to
+      quantize.
     disable_per_channel_quantization_for_dense_layers: If set, disables per
       channel end enables per tensor integer quantization for weights in Dense
       layers. The flag works only for integer quantized model.
@@ -706,6 +710,7 @@ def build_conversion_flags(
     conversion_flags.reduce_type_precision = reduce_type_precision
   if qdq_conversion_mode is not None:
     conversion_flags.qdq_conversion_mode = qdq_conversion_mode
+  conversion_flags.strict_qdq_mode = strict_qdq_mode
   conversion_flags.disable_per_channel_quantization_for_dense_layers = (
       disable_per_channel_quantization_for_dense_layers
   )
diff --git a/tflite/python/lite.py b/tflite/python/lite.py
index eb395f58..cde87f0d 100644
--- a/tflite/python/lite.py
+++ b/tflite/python/lite.py
@@ -680,6 +680,7 @@ def __init__(self):
     self._experimental_enable_composite_direct_lowering = False
     self.model_origin_framework = constants.UNSET
     self.canonicalizing_inf_as_min_max_float = True
+    self._experimental_strict_qdq = False
 
     # Debug parameters
     self.ir_dump_dir = None
@@ -837,6 +838,7 @@ def _get_base_converter_args(self):
             self.experimental_stablehlo_quantizer_config
         ),
         "qdq_conversion_mode": self._experimental_qdq_conversion_mode,
+        "strict_qdq_mode": self._experimental_strict_qdq,
         "disable_per_channel_quantization_for_dense_layers": (
             self._experimental_disable_per_channel_quantization_for_dense_layers
         ),