diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0ef3739dd..2a1bf5a0f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -114,7 +114,7 @@ test_py39_tf2_intel-1: PYTHON: "3.9" TEST_PACKAGE: coremltools.converters.mil.frontend.tensorflow WHEEL_PATH: build/dist/*cp39*10_15* - REQUIREMENTS: reqs/test.pip + REQUIREMENTS: reqs/test_tf2.pip test_py39_tf2_intel-2: <<: *test_macos_pkg @@ -126,7 +126,7 @@ test_py39_tf2_intel-2: PYTHON: "3.9" TEST_PACKAGE: coremltools.converters.mil.frontend.tensorflow2 WHEEL_PATH: build/dist/*cp39*10_15* - REQUIREMENTS: reqs/test.pip + REQUIREMENTS: reqs/test_tf2.pip test_py39_mil_intel: <<: *test_macos_pkg @@ -174,7 +174,7 @@ test_py39_milproto_intel: WHEEL_PATH: build/dist/*cp39*10_15* TEST_PACKAGE: coremltools.converters.mil.frontend.milproto PYTHON: "3.9" - REQUIREMENTS: reqs/test.pip + REQUIREMENTS: reqs/test_tf2.pip @@ -212,7 +212,7 @@ test_py310_tf2-1: PYTHON: "3.10" TEST_PACKAGE: coremltools.converters.mil.frontend.tensorflow WHEEL_PATH: build/dist/*cp310*11* - REQUIREMENTS: reqs/test.pip + REQUIREMENTS: reqs/test_tf2.pip test_py310_tf2-2: <<: *test_macos_pkg @@ -224,7 +224,7 @@ test_py310_tf2-2: PYTHON: "3.10" TEST_PACKAGE: coremltools.converters.mil.frontend.tensorflow2 WHEEL_PATH: build/dist/*cp310*11* - REQUIREMENTS: reqs/test.pip + REQUIREMENTS: reqs/test_tf2.pip test_py310_mil: <<: *test_macos_pkg @@ -272,7 +272,7 @@ test_py310_milproto: PYTHON: "3.10" TEST_PACKAGE: coremltools.converters.mil.frontend.milproto WHEEL_PATH: build/dist/*cp310*11* - REQUIREMENTS: reqs/test.pip + REQUIREMENTS: reqs/test_tf2.pip diff --git a/coremlpython/CoreMLPython.h b/coremlpython/CoreMLPython.h index 6dbfd5cf1..320ef6f6e 100644 --- a/coremlpython/CoreMLPython.h +++ b/coremlpython/CoreMLPython.h @@ -3,6 +3,8 @@ // Use of this source code is governed by a BSD-3-clause license that can be // found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +// Disable a few warnings and include pybind first, then re-enable warnings #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wexit-time-destructors" #pragma clang diagnostic ignored "-Wdocumentation" @@ -28,11 +30,13 @@ namespace CoreML { static py::bytes autoSetSpecificationVersion(const py::bytes& modelBytes); static py::str compileModel(const std::string& urlStr); static int32_t maximumSupportedSpecificationVersion(); + static void setComputeUnit(MLModelConfiguration *configuration, const std::string& computeUnits); Model(const Model&) = delete; Model& operator=(const Model&) = delete; ~Model(); explicit Model(const std::string& urlStr, const std::string& computeUnits); + explicit Model(MLModel* m_model, NSURL* compiledUrl, bool deleteCompiledModelOnExit); py::dict predict(const py::dict& input) const; py::list batchPredict(const py::list& batch) const; diff --git a/coremlpython/CoreMLPython.mm b/coremlpython/CoreMLPython.mm index e01374d34..7f65f3af1 100644 --- a/coremlpython/CoreMLPython.mm +++ b/coremlpython/CoreMLPython.mm @@ -33,18 +33,6 @@ bool usingMacOS13OrHigher() { return (NSProtocolFromString(@"MLProgram") != nil); } -bool isCompiledModelPath(const std::string& path) { - const std::string fileExtension = ".mlmodelc"; - - size_t start = path.length() - fileExtension.length(); - if (path.back() == '/') { - start--; - } - const std::string match = path.substr(start, fileExtension.length()); - - return (match == fileExtension); -} - Model::~Model() { @autoreleasepool { NSFileManager *fileManager = [NSFileManager defaultManager]; @@ -58,7 +46,7 @@ bool isCompiledModelPath(const std::string& path) { @autoreleasepool { NSError *error = nil; - if (! isCompiledModelPath(urlStr)) { + if (! Utils::isCompiledModelPath(urlStr)) { // Compile the model NSURL *specUrl = Utils::stringToNSURL(urlStr); @@ -89,24 +77,8 @@ bool isCompiledModelPath(const std::string& path) { compiledUrl = Utils::stringToNSURL(urlStr); } - // Set compute unit MLModelConfiguration *configuration = [MLModelConfiguration new]; - if (computeUnits == "CPU_ONLY") { - configuration.computeUnits = MLComputeUnitsCPUOnly; - } else if (computeUnits == "CPU_AND_GPU") { - configuration.computeUnits = MLComputeUnitsCPUAndGPU; - } else if (computeUnits == "CPU_AND_NE") { - if (usingMacOS13OrHigher()) { -#if BUILT_WITH_MACOS13_SDK - configuration.computeUnits = MLComputeUnitsCPUAndNeuralEngine; -#endif // BUILT_WITH_MACOS13_SDK - } else { - throw std::runtime_error("CPU_AND_NE is only available on macOS >= 13.0"); - } - } else { - assert(computeUnits == "ALL"); - configuration.computeUnits = MLComputeUnitsAll; - } + setComputeUnit(configuration, computeUnits); // Create MLModel m_model = [MLModel modelWithContentsOfURL:compiledUrl configuration:configuration error:&error]; @@ -114,6 +86,14 @@ bool isCompiledModelPath(const std::string& path) { } } + +Model::Model(MLModel* mlModel, NSURL* compiledUrl, bool deleteCompiledModelOnExit) + : m_model(mlModel), + compiledUrl(compiledUrl), + m_deleteCompiledModelOnExit(deleteCompiledModelOnExit) +{ +} + py::dict Model::predict(const py::dict& input) const { @autoreleasepool { NSError *error = nil; @@ -127,6 +107,26 @@ bool isCompiledModelPath(const std::string& path) { } +void Model::setComputeUnit(MLModelConfiguration *configuration, const std::string& computeUnits) { + if (computeUnits == "CPU_ONLY") { + configuration.computeUnits = MLComputeUnitsCPUOnly; + } else if (computeUnits == "CPU_AND_GPU") { + configuration.computeUnits = MLComputeUnitsCPUAndGPU; + } else if (computeUnits == "CPU_AND_NE") { + if (usingMacOS13OrHigher()) { +#if BUILT_WITH_MACOS13_SDK + configuration.computeUnits = MLComputeUnitsCPUAndNeuralEngine; +#endif // BUILT_WITH_MACOS13_SDK + } else { + throw std::runtime_error("CPU_AND_NE is only available on macOS >= 13.0"); + } + } else { + assert(computeUnits == "ALL"); + configuration.computeUnits = MLComputeUnitsAll; + } +} + + py::list Model::batchPredict(const py::list& batch) const { @autoreleasepool { NSError* error = nil; @@ -156,6 +156,9 @@ bool isCompiledModelPath(const std::string& path) { py::str Model::getCompiledModelPath() const { + if (this->compiledUrl == nil) { + return nil; + } return [this->compiledUrl.path UTF8String]; } diff --git a/coremlpython/CoreMLPythonUtils.h b/coremlpython/CoreMLPythonUtils.h index a970746df..dd326e6f7 100644 --- a/coremlpython/CoreMLPythonUtils.h +++ b/coremlpython/CoreMLPythonUtils.h @@ -23,6 +23,7 @@ namespace CoreML { namespace Python { namespace Utils { + bool isCompiledModelPath(const std::string& path); NSURL * stringToNSURL(const std::string& str); void handleError(NSError *error); diff --git a/coremlpython/CoreMLPythonUtils.mm b/coremlpython/CoreMLPythonUtils.mm index edfc2de1e..1186182af 100644 --- a/coremlpython/CoreMLPythonUtils.mm +++ b/coremlpython/CoreMLPythonUtils.mm @@ -29,6 +29,18 @@ using namespace CoreML::Python; +bool Utils::isCompiledModelPath(const std::string& path) { + const std::string fileExtension = ".mlmodelc"; + + size_t start = path.length() - fileExtension.length(); + if (path.back() == '/') { + start--; + } + const std::string match = path.substr(start, fileExtension.length()); + + return (match == fileExtension); +} + NSURL * Utils::stringToNSURL(const std::string& str) { NSString *nsstr = [NSString stringWithUTF8String:str.c_str()]; return [NSURL fileURLWithPath:nsstr]; diff --git a/coremltools/__init__.py b/coremltools/__init__.py index 84821cea1..30130e1ba 100644 --- a/coremltools/__init__.py +++ b/coremltools/__init__.py @@ -21,6 +21,7 @@ For more information: http://developer.apple.com/documentation/coreml """ + from enum import Enum as _Enum from logging import getLogger as _getLogger @@ -90,15 +91,14 @@ class ComputeUnit(_Enum): # expose sub packages as directories from . import converters, models, optimize, proto - # expose unified converter in coremltools package level from .converters import ClassifierConfig from .converters import ColorLayout as colorlayout from .converters import EnumeratedShapes, ImageType, RangeDim, Shape, TensorType, convert from .converters.mil._deployment_compatibility import AvailableTarget as target from .converters.mil.mil.passes.defs import quantization as transform -from .converters.mil.mil.passes.pass_pipeline import PassPipeline from .converters.mil.mil.passes.defs.quantization import ComputePrecision as precision +from .converters.mil.mil.passes.pass_pipeline import PassPipeline from .models import utils from .models.ml_program import compression_utils diff --git a/coremltools/_deps/__init__.py b/coremltools/_deps/__init__.py index 7afd4d05f..bc98849fc 100644 --- a/coremltools/_deps/__init__.py +++ b/coremltools/_deps/__init__.py @@ -154,7 +154,7 @@ def __get_sklearn_version(version): # --------------------------------------------------------------------------------------- _HAS_TORCH = True -_TORCH_MAX_VERSION = "2.1.0" +_TORCH_MAX_VERSION = "2.2.0" _HAS_TORCH_EXPORT_API = False try: import torch diff --git a/coremltools/converters/mil/backend/mil/helper.py b/coremltools/converters/mil/backend/mil/helper.py index 9a88b4fc9..d6c7cd66a 100644 --- a/coremltools/converters/mil/backend/mil/helper.py +++ b/coremltools/converters/mil/backend/mil/helper.py @@ -3,21 +3,11 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -import os import numpy as np -import coremltools.proto.FeatureTypes_pb2 as ft -import coremltools.proto.MIL_pb2 as pm +from coremltools import proto from coremltools.converters.mil.mil import types -from coremltools.converters.mil.mil.types import ( - BUILTIN_TO_PROTO_TYPES, - builtin_to_string, - numpy_type_to_builtin_type, - type_to_builtin_type, -) -from coremltools.converters.mil.mil.types.type_mapping import np_val_to_py_type -from coremltools.models.utils import _WEIGHTS_DIR_NAME, _WEIGHTS_FILE_NAME # For immediate values, those types are stored in bytes (MIL parser reads those types from bytes). IMMEDIATE_VALUE_TYPES_IN_BYTES = (types.fp16, types.int8, types.uint8, types.uint32) @@ -25,9 +15,9 @@ def create_valuetype_scalar(data_type): """ - Return pm.ValueType with DataType set + Return proto.MIL_pb2.ValueType with DataType set """ - v_type = pm.ValueType() + v_type = proto.MIL_pb2.ValueType() update_tensortype(v_type.tensorType, (), data_type) return v_type @@ -45,18 +35,18 @@ def update_listtype(l_type, length, elem_shape, dtype): def create_valuetype_list(length, elem_shape, dtype): """ - Return pm.ValueType with List (ListType) set. + Return proto.MIL_pb2.ValueType with List (ListType) set. length: length of list (int) """ - v_type = pm.ValueType() + v_type = proto.MIL_pb2.ValueType() update_listtype(v_type.listType, length, elem_shape, dtype) return v_type def create_valuetype_dict(key_type, value_type): """ - Return pm.ValueType with dict (dictionaryType) set + Return proto.MIL_pb2.ValueType with dict (dictionaryType) set """ - v_type = pm.ValueType() + v_type = proto.MIL_pb2.ValueType() v_type.dictionaryType.keyType.CopyFrom(types_to_proto(key_type)) v_type.dictionaryType.valueType.CopyFrom(types_to_proto(value_type)) return v_type @@ -64,10 +54,10 @@ def create_valuetype_dict(key_type, value_type): def create_valuetype_tensor(shape, data_type): """ - Return pm.ValueType with tensor (TensorType) set. + Return proto.MIL_pb2.ValueType with tensor (TensorType) set. shape: list of ints """ - v_type = pm.ValueType() + v_type = proto.MIL_pb2.ValueType() update_tensortype(v_type.tensorType, shape, data_type) return v_type @@ -123,7 +113,10 @@ def _tensor_field_by_type(tensor_val, builtin_type): return tensor_val.bytes.values else: raise TypeError( - "Unsupported float dtype for MIL proto serialization: {}".format(builtin_to_string(builtin_type))) + "Unsupported float dtype for MIL proto serialization: {}".format( + types.builtin_to_string(builtin_type) + ) + ) elif builtin_type == types.str: return tensor_val.strings.values else: @@ -147,7 +140,11 @@ def _set_empty_tensor_field_by_type(tensor_val, builtin_type): elif (builtin_type == types.fp16): tensor_val.bytes.SetInParent() else: - raise TypeError("Unsupported float dtype for MIL proto serialization: {}".format(builtin_to_string(builtin_type))) + raise TypeError( + "Unsupported float dtype for MIL proto serialization: {}".format( + types.builtin_to_string(builtin_type) + ) + ) elif builtin_type == types.str: tensor_val.strings.SetInParent() else: @@ -157,10 +154,10 @@ def create_tensor_value(np_tensor): """ Return TensorValue. """ - builtin_type = numpy_type_to_builtin_type(np_tensor.dtype) + builtin_type = types.numpy_type_to_builtin_type(np_tensor.dtype) value_type = create_valuetype_tensor(np_tensor.shape, types_to_proto_primitive(builtin_type)) - val = pm.Value(type=value_type) + val = proto.MIL_pb2.Value(type=value_type) t_val = val.immediateValue.tensor # Copy the tensor values from the input tensor @@ -171,10 +168,10 @@ def create_tensor_value(np_tensor): for x in np.nditer(np_tensor): t_field.append(x.encode("utf-8")) elif builtin_type in IMMEDIATE_VALUE_TYPES_IN_BYTES: - val.immediateValue.tensor.bytes.values = np_val_to_py_type(np_tensor) + val.immediateValue.tensor.bytes.values = types.type_mapping.np_val_to_py_type(np_tensor) else: for x in np_tensor.flatten(): - t_field.append(np_val_to_py_type(x)) + t_field.append(types.type_mapping.np_val_to_py_type(x)) else: # This is an "empty" tensor (tensor with a dimension being size 0) _set_empty_tensor_field_by_type(t_val, builtin_type) return val @@ -185,20 +182,20 @@ def create_scalar_value(py_scalar): Return TensorValue (since there's no ScalarValue) """ # Create the "scalar" (rank 0) tensor - builtin_type = type_to_builtin_type(type(py_scalar)) + builtin_type = types.type_to_builtin_type(type(py_scalar)) value_type = create_valuetype_scalar(types_to_proto_primitive(builtin_type)) - val = pm.Value(type=value_type) + val = proto.MIL_pb2.Value(type=value_type) t_val = val.immediateValue.tensor # Set the tensor value t_field = _tensor_field_by_type(t_val, builtin_type) if builtin_type in IMMEDIATE_VALUE_TYPES_IN_BYTES: # Serialize to bytes because MIL read them from the "bytes" field in TensorValue. - val.immediateValue.tensor.bytes.values = np_val_to_py_type(py_scalar) + val.immediateValue.tensor.bytes.values = types.type_mapping.np_val_to_py_type(py_scalar) else: if builtin_type == types.str: py_scalar = py_scalar.encode("utf-8") - t_field.append(np_val_to_py_type(py_scalar)) + t_field.append(types.type_mapping.np_val_to_py_type(py_scalar)) return val @@ -207,7 +204,7 @@ def create_tuple_value(py_tuple): """ Return type of Tuple """ - tp_val = pm.TupleValue() + tp_val = proto.MIL_pb2.TupleValue() for t in py_tuple: item_val = tp_val.values.add() item_type = item_val.type # ValueType @@ -227,11 +224,11 @@ def create_list_scalarvalue(py_list, np_type): """ Return a Value of type List, which holds scalar values """ - builtin_type = numpy_type_to_builtin_type(np_type) + builtin_type = types.numpy_type_to_builtin_type(np_type) value_type = create_valuetype_list(length=len(py_list), elem_shape=(), dtype=types_to_proto_primitive(builtin_type)) - val = pm.Value(type=value_type) + val = proto.MIL_pb2.Value(type=value_type) list_val = val.immediateValue.list for v in py_list: @@ -244,15 +241,15 @@ def create_file_value_tensor(file_name, offset, dim, data_type): """ Create a Value Type to store File Value """ - val = pm.Value( - blobFileValue=pm.Value.BlobFileValue(fileName=file_name, offset=offset), + val = proto.MIL_pb2.Value( + blobFileValue=proto.MIL_pb2.Value.BlobFileValue(fileName=file_name, offset=offset), type=create_valuetype_tensor(dim, data_type), ) return val def types_to_proto_primitive(valuetype): - if valuetype not in BUILTIN_TO_PROTO_TYPES: + if valuetype not in types.BUILTIN_TO_PROTO_TYPES: additional_error_msg = "" if valuetype in (types.complex64, types.complex128): additional_error_msg = ( @@ -262,7 +259,7 @@ def types_to_proto_primitive(valuetype): raise ValueError( f"Unknown map from SSA type {valuetype} to Proto type. {additional_error_msg}" ) - return BUILTIN_TO_PROTO_TYPES[valuetype] + return types.BUILTIN_TO_PROTO_TYPES[valuetype] def types_to_proto(valuetype): @@ -270,7 +267,7 @@ def types_to_proto(valuetype): primitive = types_to_proto_primitive(valuetype.get_primitive()) return create_valuetype_tensor(valuetype.get_shape(), primitive) elif types.is_tuple(valuetype): - v_type = pm.ValueType() + v_type = proto.MIL_pb2.ValueType() t_type = v_type.tupleType for t in valuetype.T: new_v_type = t_type.types.add() @@ -321,17 +318,6 @@ def _get_offset_by_writing_data(output_var, blob_writer): return offset - -def create_file_value(output_var, blob_writer): - offset = _get_offset_by_writing_data(output_var, blob_writer) - - return create_file_value_tensor( - file_name=os.path.join(os.path.join('@model_path', _WEIGHTS_DIR_NAME), _WEIGHTS_FILE_NAME), - offset=offset, - dim=output_var.val.shape, - data_type=types_to_proto_primitive(output_var.sym_type.get_primitive()), - ) - def create_immediate_value(var): if types.is_tensor(var.sym_type): return create_tensor_value(var.val) @@ -347,13 +333,20 @@ def create_immediate_value(var): def cast_to_framework_io_dtype(var, is_output): if var.dtype == types.fp32: - return ft.ArrayFeatureType.ArrayDataType.FLOAT32 + return proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.FLOAT32 elif var.dtype == types.int32: - return ft.ArrayFeatureType.ArrayDataType.INT32 + return proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.INT32 elif var.dtype == types.fp16: - return ft.ArrayFeatureType.ArrayDataType.FLOAT16 + return proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.FLOAT16 else: ioname = "Output " if is_output else "Input " ioname2 = "outputs" if is_output else "inputs" - raise NotImplementedError(ioname + var.name + " has data type " + builtin_to_string(var.dtype) + \ - ". ML Program models only support fp32 and int32 " + ioname2 + ".") + raise NotImplementedError( + ioname + + var.name + + " has data type " + + types.builtin_to_string(var.dtype) + + ". ML Program models only support fp32 and int32 " + + ioname2 + + "." + ) diff --git a/coremltools/converters/mil/backend/mil/load.py b/coremltools/converters/mil/backend/mil/load.py index 4d070c708..b57b590df 100644 --- a/coremltools/converters/mil/backend/mil/load.py +++ b/coremltools/converters/mil/backend/mil/load.py @@ -5,39 +5,46 @@ import os import warnings -from typing import Optional +from collections import OrderedDict +from typing import Any, Dict, List, Optional import numpy as np -from coremltools import _OPSET, _SPECIFICATION_VERSION_IOS_15 +from coremltools import _OPSET, _SPECIFICATION_VERSION_IOS_15, _SPECIFICATION_VERSION_IOS_17 from coremltools import _logger as logger +from coremltools import proto +from coremltools.converters.mil import mil from coremltools.converters.mil.backend.backend_helper import _get_probability_var_for_classifier +from coremltools.converters.mil.backend.mil import helper from coremltools.converters.mil.backend.mil.helper import ( cast_to_framework_io_dtype, - create_file_value, + create_file_value_tensor, create_immediate_value, create_list_scalarvalue, create_scalar_value, types_to_proto, + types_to_proto_primitive, ) from coremltools.converters.mil.backend.nn.load import _set_optional_inputs -from coremltools.converters.mil.input_types import EnumeratedShapes, ImageType, RangeDim, TensorType +from coremltools.converters.mil.input_types import ( + ClassifierConfig, + EnumeratedShapes, + ImageType, + RangeDim, + TensorType, +) +from coremltools.converters.mil.mil import Block from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Function, Program, mil_list, types +from coremltools.converters.mil.mil import Function, Operation, Program, Var, mil_list, types from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry +from coremltools.converters.mil.mil.scope import ScopeInfo, ScopeSource from coremltools.converters.mil.mil.types.symbolic import any_symbolic, any_variadic, is_symbolic +from coremltools.models.neural_network import flexible_shape_utils from coremltools.models.neural_network.flexible_shape_utils import ( NeuralNetworkImageSize, NeuralNetworkImageSizeRange, - add_enumerated_image_sizes, - add_multiarray_ndshape_enumeration, - set_multiarray_ndshape_range, - update_image_size_range, ) -from coremltools.models.utils import _WEIGHTS_FILE_NAME -from coremltools.proto import FeatureTypes_pb2 as ft -from coremltools.proto import MIL_pb2 as pm -from coremltools.proto import Model_pb2 as ml +from coremltools.models.utils import _WEIGHTS_DIR_NAME, _WEIGHTS_FILE_NAME from ..backend_helper import _get_colorspace_enum, _validate_image_input_output_shapes @@ -56,174 +63,304 @@ def should_use_weight_file(val): and val.dtype in ['float16', 'float32', 'uint8', 'int8'] ) +class MILProtoExporter: + """ + An utility class to export a pymil program to milproto. + """ -def translate_const(op, blob_writer): - output_var = op.outputs[0] - - if should_use_weight_file(output_var.val): - value = create_file_value(output_var, blob_writer) - else: - value = create_immediate_value(output_var) - - return pm.Operation( - type="const", - attributes={"name": create_scalar_value(op.name), "val": value}, - outputs=[ - pm.NamedValueType( - name=output_var.name, type=types_to_proto(output_var.sym_type) - ) - ], - ) + def __init__( + self, + prog: Program, + weights_dir: str, + ): + self.prog = prog + self.weights_dir = weights_dir + self.blob_writers = {} + self.prog.validate(check_essential_scope=True) + + def translate_program_attributes(self) -> Dict[str, Any]: + """ + Get the program attributes which need to be exported to mil proto. + """ + return {} + + def get_weight_path(self, op: Operation) -> str: + """ + Get the weight path for a constant operation. + By default, the weight is saved in {weight_dir}/weight.bin + """ + assert ( + op.op_type == "const" + ), f"Expected op (op.name) be a const op. Got op_type of {op.op_type}." + return os.path.join(self.weights_dir, _WEIGHTS_FILE_NAME) + + def get_blob_writer(self, weight_path: str) -> BlobWriter: + """ + Get a blob writer given a weight_path. + """ + if weight_path not in self.blob_writers: + self.blob_writers[weight_path] = BlobWriter(weight_path) + return self.blob_writers[weight_path] + + def create_file_value(self, var: Var) -> proto.MIL_pb2.Value: + """ + Returns the mil proto file value of a var. + """ + weight_path = self.get_weight_path(var.op) + blob_writer = self.get_blob_writer(weight_path) + offset = helper._get_offset_by_writing_data(var, blob_writer) + weight_file_name = os.path.basename(weight_path) + + return create_file_value_tensor( + file_name=os.path.join( + os.path.join("@model_path", _WEIGHTS_DIR_NAME), weight_file_name + ), + offset=offset, + dim=var.val.shape, + data_type=types_to_proto_primitive(var.sym_type.get_primitive()), + ) + def get_milproto_value(self, var: Var) -> proto.MIL_pb2.Value: + """ + Translate a pymil Var into milproto value. + """ + if should_use_weight_file(var.val): + return self.create_file_value(var) + else: + return create_immediate_value(var) + + @staticmethod + def _get_input_dict(op: Operation) -> Dict[str, Any]: + """ + Given an op, returns a dict that maps the param name into the corresponding Var. + """ + return op.inputs + + @staticmethod + def _get_attr_dict(op: Operation) -> Dict[str, Any]: + """ + Return the initial attribute dict for an op. + """ + return {"name": create_scalar_value(op.name)} + + def translate_const(self, op: Operation) -> proto.MIL_pb2.Operation: + """ + Translate constant operation. + """ + if len(op.outputs) != 1: + raise AssertionError(f"const {op.name} must have 1 output, but got {len(op.outputs)}") + + output_var = op.outputs[0] + value = self.get_milproto_value(output_var) + + return proto.MIL_pb2.Operation( + type="const", + attributes={"name": create_scalar_value(op.name), "val": value}, + outputs=[ + proto.MIL_pb2.NamedValueType( + name=output_var.name, type=types_to_proto(output_var.sym_type) + ) + ], + ) -def translate_constexpr(op, blob_writer): + def translate_constexpr(self, op: Operation) -> proto.MIL_pb2.Operation: + """ + Translate constexpr operation. + """ + inputs = {} + attributes = {"name": create_scalar_value(op.name)} - def get_value(var): - if should_use_weight_file(var.val): - value = create_file_value(var, blob_writer) + if op.opset_version <= _SPECIFICATION_VERSION_IOS_17: + attributes.update( + {param_name: self.get_milproto_value(var) for param_name, var in op.inputs.items()} + ) else: - value = create_immediate_value(var) + for param_name, var in op.inputs.items(): + if var.op.op_type.startswith("constexpr_"): + arguments = [proto.MIL_pb2.Argument.Binding(name=var.name)] + else: + arguments = [proto.MIL_pb2.Argument.Binding(value=self.get_milproto_value(var))] + args = proto.MIL_pb2.Argument() + args.arguments.extend(arguments) + inputs[param_name] = args + + return proto.MIL_pb2.Operation( + type=op.op_type, + inputs=inputs, + attributes=attributes, + outputs=[ + proto.MIL_pb2.NamedValueType( + name=output_var.name, type=types_to_proto(output_var.sym_type) + ) + for output_var in op.outputs + ], + ) - return value + def translate_generic_op( + self, op: Operation, literal_params: Optional[List[str]] = None + ) -> proto.MIL_pb2.Operation: + """ + Translate a generic pymil Operation. + """ + if literal_params is None: + literal_params = [] - output_var = op.outputs[0] + inputs = {} - attributes = {"name": create_scalar_value(op.name)} - attributes.update({k: get_value(v) for k, v in op.inputs.items()}) + for param_name, vars in self._get_input_dict(op).items(): + if param_name.startswith("_"): + continue + if not isinstance(vars, (list, tuple)): + vars = [vars] + + arguments = [] + for _var in vars: + binding = proto.MIL_pb2.Argument.Binding() + # use const value literals if requested + if param_name in literal_params: + binding.value.CopyFrom(create_immediate_value(_var)) + else: + binding.name = _var.name + arguments.append(binding) + + args = proto.MIL_pb2.Argument() + args.arguments.extend(arguments) + inputs[param_name] = args + + outputs = [ + proto.MIL_pb2.NamedValueType(name=v.name, type=types_to_proto(v.sym_type)) + for v in op.outputs + ] + blocks = None + if len(op.blocks) > 0: + blocks = [self.create_block(b) for b in op.blocks] + + op_type = op.op_type + attr_dict = self._get_attr_dict(op) + if op.op_type in SSAOpRegistry.custom_ops: + op_type = "custom_layer" + class_name = op.bindings.get("class_name", op.name) + input_order = op.bindings.get("input_order", []) + parameters = op.bindings.get("parameters", []) + weights = op.bindings.get("weights", []) + description = op.bindings.get("description", "") + + attr_dict["class_name"] = create_scalar_value(class_name) + attr_dict["input_order"] = create_list_scalarvalue(input_order, str) + attr_dict["parameters"] = create_list_scalarvalue(parameters, str) + attr_dict["weights"] = create_list_scalarvalue(weights, str) + attr_dict["description"] = create_scalar_value(description) + + return proto.MIL_pb2.Operation( + type=op_type, + blocks=blocks, + inputs=inputs, + attributes=attr_dict, + outputs=outputs, + ) - return pm.Operation( - type=op.op_type, - attributes=attributes, - outputs=[ - pm.NamedValueType( - name=output_var.name, type=types_to_proto(output_var.sym_type) + def create_block(self, block: Block) -> proto.MIL_pb2.Block: + """ + Translate pymil Block. + """ + def feeds_to_only_constexprs(op: Operation) -> bool: + return ( + (op.op_type == "const") + and len(op.outputs[0].child_ops) > 0 + and all( + (child_op.op_type.startswith("constexpr_")) + for child_op in op.outputs[0].child_ops + ) ) - ], - ) + proto_ops = [] + + # Find the const op that generates classify's "label" / "class" string vec. + classify_const_classes_op = None + if len(block.operations) > 0: + # Classify is always the last operation in the block. + op = block.operations[-1] + op_cls_name = type(op).__name__ + if op_cls_name == "classify": + classes_var = op.inputs["classes"] + classify_const_classes_op = classes_var.op + if len(classes_var.child_ops) != 1: + raise ValueError( + "Classify's labels/classes should be input to only 1 op (classify)." + ) -def translate_generic_op(op, parameters, blob_writer, literal_params=[]): - inputs = {} - for param_name, vars in op.inputs.items(): - if param_name.startswith("_"): - continue - if not isinstance(vars, (list, tuple)): - vars = [vars] - - arguments = [] - for _var in vars: - binding = pm.Argument.Binding() - # use const value literals if requested - if param_name in literal_params: - binding.value.CopyFrom(create_immediate_value(_var)) + for op in block.operations: + op_cls_name = type(op).__name__ + if op_cls_name == "const": + if feeds_to_only_constexprs(op): + continue + # Do not serialize the const op that creates the var bound to the classifier's "classes" param. + # The variable's value will be bound directly to classify's "classes" param instead. + if op != classify_const_classes_op: + proto_ops.append(self.translate_const(op)) + elif op_cls_name.startswith("constexpr_"): + proto_ops.append(self.translate_constexpr(op)) + elif op_cls_name == "classify": + # Classify's "classes" param should be serialized as a value literal bound + # directly to the param, rather than as a const-generated variable. + proto_ops.append(self.translate_generic_op(op, ["classes"])) + elif op_cls_name == "reshape_like": + # The reshape_like should also be able to take value from a const op + # This is a workaround solution + # rdar://98689808 (Reshape_like should also accept const value from non literal input) + literal_params = ["begins", "ends", "end_masks"] + proto_ops.append(self.translate_generic_op(op, literal_params)) else: - binding.name = _var.name - arguments.append(binding) - - args = pm.Argument() - args.arguments.extend(arguments) - inputs[param_name] = args - - outputs = [ - pm.NamedValueType(name=v.name, type=types_to_proto(v.sym_type)) - for v in op.outputs - ] - blocks = None - if len(op.blocks) > 0: - blocks = [create_block(b, parameters, blob_writer) for b in op.blocks] - - op_type = op.op_type - attr_dict = {} - if op.op_type in SSAOpRegistry.custom_ops: - op_type = "custom_layer" - class_name = op.bindings.get("class_name", op.name) - input_order = op.bindings.get("input_order", []) - parameters = op.bindings.get("parameters", []) - weights = op.bindings.get("weights", []) - description = op.bindings.get("description", "") - - attr_dict["name"] = create_scalar_value(op.name) - attr_dict["class_name"] = create_scalar_value(class_name) - attr_dict["input_order"] = create_list_scalarvalue(input_order, str) - attr_dict["parameters"] = create_list_scalarvalue(parameters, str) - attr_dict["weights"] = create_list_scalarvalue(weights, str) - attr_dict["description"] = create_scalar_value(description) - - attr_dict["name"] = create_scalar_value(op.name) - - return pm.Operation( - type=op_type, - blocks=blocks, - inputs=inputs, - attributes=attr_dict, - outputs=outputs, - ) + proto_ops.append(self.translate_generic_op(op)) + + inputs = [] + if not isinstance(block, Function): + # Function is subclass of Block, but function's block has no input, + # and hence skipping reading the block inputs. + for var in block.inputs: + proto_type = types_to_proto(var.sym_type) + inputs.append(proto.MIL_pb2.NamedValueType(name=var.name, type=proto_type)) + output_names = [v.name for v in block.outputs] + return proto.MIL_pb2.Block(inputs=inputs, outputs=output_names, operations=proto_ops) + + def convert_function(self, function: Function, opset: str) -> proto.MIL_pb2.Function: + """ + Translate pymil Function. + """ + block = self.create_block(function) + + inputs = [] + for name, var in function.inputs.items(): + proto_type = types_to_proto(var.sym_type) + inputs.append(proto.MIL_pb2.NamedValueType(name=name, type=proto_type)) -def create_block(block, parameters, blob_writer): - - def feeds_to_only_constexprs(op): - return (op.op_type == 'const') \ - and len(op.outputs[0].child_ops) > 0 \ - and all((child_op.op_type.startswith("constexpr_")) for child_op in op.outputs[0].child_ops) - - proto_ops = [] - - # Find the const op that generates classify's "label" / "class" string vec. - classify_const_classes_op = None - if len(block.operations) > 0: - # Classify is always the last operation in the block. - op = block.operations[-1] - op_cls_name = type(op).__name__ - if (op_cls_name == "classify"): - classes_var = op.inputs["classes"] - classify_const_classes_op = classes_var.op - if (len(classes_var.child_ops) != 1): - raise ValueError("Classify's labels/classes should be input to only 1 op (classify).") - - for op in block.operations: - op_cls_name = type(op).__name__ - if op_cls_name == "const": - if feeds_to_only_constexprs(op): - continue - # Do not serialize the const op that creates the var bound to the classifier's "classes" param. - # The variable's value will be bound directly to classify's "classes" param instead. - if op != classify_const_classes_op: - proto_ops.append(translate_const(op, blob_writer)) - elif op_cls_name.startswith("constexpr_"): - proto_ops.append(translate_constexpr(op, blob_writer)) - elif op_cls_name == "classify": - # Classify's "classes" param should be serialized as a value literal bound - # directly to the param, rather than as a const-generated variable. - proto_ops.append(translate_generic_op(op, parameters, blob_writer, ["classes"])) - elif op_cls_name == "reshape_like": - # The reshape_like should also be able to take value from a const op - # This is a workaround solution - # rdar://98689808 (Reshape_like should also accept const value from non literal input) - literal_params = ["begins", "ends", "end_masks"] - proto_ops.append(translate_generic_op(op, parameters, blob_writer, literal_params)) - else: - proto_ops.append(translate_generic_op(op, parameters, blob_writer)) + return proto.MIL_pb2.Function( + inputs=inputs, opset=opset, block_specializations={opset: block} + ) - inputs = [] - if not isinstance(block, Function): - # Function is subclass of Block, but function's block has no input, - # and hence skipping reading the block inputs. - for var in block.inputs: - proto_type = types_to_proto(var.sym_type) - inputs.append(pm.NamedValueType(name=var.name, type=proto_type)) - output_names = [v.name for v in block.outputs] - return pm.Block(inputs=inputs, outputs=output_names, operations=proto_ops) + def export( + self, specification_version: Optional[str] = _SPECIFICATION_VERSION_IOS_15 + ) -> proto.MIL_pb2.Program: + """ + Export a pymil program into mil proto with the given specification version. + """ + if BlobWriter is None: + raise RuntimeError("BlobWriter not loaded") + function_protos = {} + for func_name, func in self.prog.functions.items(): + function_protos[func_name] = self.convert_function(func, _OPSET[specification_version]) -def convert_function(function, parameters, blob_writer, opset): - block = create_block(function, parameters, blob_writer) + kwargs = { + "version": 1, + "functions": function_protos, + } - inputs = [] - for name, var in function.inputs.items(): - proto_type = types_to_proto(var.sym_type) - inputs.append(pm.NamedValueType(name=name, type=proto_type)) + prog_attributes = self.translate_program_attributes() + if len(prog_attributes) > 0: + kwargs["attributes"] = prog_attributes - return pm.Function(inputs=inputs, opset=opset, block_specializations={opset: block}) + return proto.MIL_pb2.Program(**kwargs) # Add a classify op to the output. # Replaces the original probabilities output (in the containing MIL block) @@ -237,6 +374,8 @@ def remove_output(block, prob_var): for i in range(len(block.outputs)): if block.outputs[i] is prob_var: block.outputs.pop(i) + if block in prob_var.consuming_blocks: + prob_var.consuming_blocks.remove(block) break block = prog.functions["main"] @@ -258,342 +397,461 @@ def remove_output(block, prob_var): raise ValueError(message) probability_var = _get_probability_var_for_classifier(prog, classifier_config) + original_probability_var = probability_var # add the classify op now - with block: - # cast the int label to np.int64 - if isinstance(classes[0], int): - classes = [np.int64(x) for x in classes] - classes_var = mb.const(val=mil_list(classes)) - if probability_var.dtype != types.fp32: + # we consider this step as a scope of coremltools graph pass + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["add_classify_op"])): + with block: + # cast the int label to np.int64 + if isinstance(classes[0], int): + classes = [np.int64(x) for x in classes] + classes_var = mb.const(val=mil_list(classes)) + if probability_var.dtype != types.fp32: + remove_output(block, probability_var) + probability_var = mb.cast( + x=probability_var, dtype="fp32", name=probability_var.name + "_cast_to_fp32" + ) + out = mb.classify(probabilities=probability_var, classes=classes_var) + + predicted_feature_name = ( + "classLabel" + if classifier_config.predicted_feature_name is None + else classifier_config.predicted_feature_name + ) + out[0].name = predicted_feature_name + out[1].name = predicted_feature_name + "_probs" + + # Remove probabilities from block outputs, replace with classify's outputs remove_output(block, probability_var) - probability_var = mb.cast(x=probability_var, dtype="fp32", name=probability_var.name + "_cast_to_fp32") - out = mb.classify(probabilities=probability_var, - classes=classes_var - ) + block.outputs[:0] = out + out[0].consuming_blocks.append(block) + out[1].consuming_blocks.append(block) - predicted_feature_name = "classLabel" if classifier_config.predicted_feature_name is None \ - else classifier_config.predicted_feature_name - out[0].name = predicted_feature_name - out[1].name = predicted_feature_name + "_probs" + # The new classifier op should have scope information + Block._copy_scope_info(original_probability_var, out[0]) - # Remove probabilities from block outputs, replace with classify's outputs - remove_output(block, probability_var) - block.outputs[:0] = out - return out[0].name, out[1].name + return out[0].name, out[1].name -def _pymil_to_milproto( - prog: Program, - weights_dir: str, - specification_version: Optional[int] = _SPECIFICATION_VERSION_IOS_15, -) -> pm.Program: +class CoreMLProtoExporter: """ - Convert a pymil program into mil proto. + An utility class to export a pymil program to coreml model. """ - if BlobWriter is None: - raise RuntimeError("BlobWriter not loaded") - - weight_path = os.path.join(weights_dir, _WEIGHTS_FILE_NAME) - blob_writer = BlobWriter(weight_path) - - opset = _OPSET[specification_version] - - function_protos = {} - for func_name, func in prog.functions.items(): - function_protos[func_name] = convert_function(func, prog.parameters, blob_writer, opset) - proto = pm.Program( - version=1, - functions=function_protos, - ) - return proto - - -def load( - prog: Program, - weights_dir: str, - resume_on_errors: Optional[bool] = False, - specification_version: Optional[int] = _SPECIFICATION_VERSION_IOS_15, - **kwargs, -): - if "main" not in prog.functions: - raise ValueError("main function not found in program") - - # if user has specified "ClassifierConfig", then add the "classify" op to the prog - classifier_config = kwargs.get("classifier_config", None) - predicted_feature_name = None - predicted_probabilities_name = None - if classifier_config is not None: - predicted_feature_name, predicted_probabilities_name = _add_classify_op( - prog, classifier_config - ) - - # convert pymil program into mil proto - proto = _pymil_to_milproto(prog, weights_dir, specification_version) - - input_types = prog.main_input_types - output_types = prog.main_output_types - - desc = kwargs.get("model_description", None) - if desc and not isinstance(desc, ml.ModelDescription): - raise ValueError("Invalid model descriptor") - - if desc: - if classifier_config is not None: - raise AssertionError("Both model_description and classifier_config can't be provided") - model = ml.Model(description=desc, specificationVersion=specification_version) - model.mlProgram.CopyFrom(proto) - return model + _DEFAULT_FUNCTION_NAME = "main" + + def __init__( + self, + prog: mil.Program, + mil_proto: proto.MIL_pb2.Program, + predicted_feature_name: str, + predicted_probabilities_name: str, + classifier_config: ClassifierConfig, + convert_to: str, + convert_from: str, + ): + self.prog = prog + self.mil_proto = mil_proto + self.predicted_feature_name = predicted_feature_name + self.predicted_probabilities_name = predicted_probabilities_name + self.classifier_config = classifier_config + self.convert_to = convert_to + self.convert_from = convert_from + self.prog.validate(check_essential_scope=True) + + @staticmethod + def get_additional_kwargs(kwargs: Dict[str, Any]) -> Dict[str, Any]: + """ + Get additional coreml proto related kwargs. + """ + return {} + + def get_func_input(self, func: mil.Function) -> List[proto.Model_pb2.FeatureDescription]: + """ + Utils to get function input feature description. + """ + input_types = func.input_types + + input_features = [] + image_input_names = {} # these are the model inputs marked as image by the user + input_shape_map = {} + + for input_type in input_types: + if isinstance(input_type, ImageType): + image_input_names[input_type.name] = input_type + # error checking for input(s) marked as images + if input_type.name not in list(func.inputs.keys()): + raise ValueError( + f"Provided image input '{input_type.name}' is not one of the inputs of the MIL program" + ) + if input_type.name is None: + raise ValueError( + 'Fail to auto-determine the input name. Please specify the "name" ' + 'parameter when use "inputs" in ct.convert().' + ) + input_shape_map[input_type.name] = input_type - input_features = [] - output_features = [] - symbolic_inputs = [] - image_input_names = {} # these are the model inputs marked as image by the user - input_shape_map = {} + for name, var in func.inputs.items(): + input_feature_type = proto.FeatureTypes_pb2.FeatureType() + is_input_shape_symbolic = False - for input_type in input_types: - if isinstance(input_type, ImageType): - image_input_names[input_type.name] = input_type # error checking for input(s) marked as images - if input_type.name not in list(prog.functions["main"].inputs.keys()): + # an image input must be of type tensor in program proto + # (since an image type does not exist in MIL program) + if name in image_input_names and not types.is_tensor(var.sym_type): raise ValueError( - f"Provided image input '{input_type.name}' is not one of the inputs of the MIL program" + "For the image input, '{}', its type in the MIL program must be tensor. " + "Instead it is {}.".format(name, var.sym_type.__type_info__()) ) - if input_type.name is None: - raise ValueError( - 'Fail to auto-determine the input name. Please specify the "name" ' - 'parameter when use "inputs" in ct.convert().' - ) - input_shape_map[input_type.name] = input_type - - for name, var in prog.functions["main"].inputs.items(): - input_feature_type = ft.FeatureType() - - # error checking for input(s) marked as images - # an image input must be of type tensor in program proto - # (since an image type does not exist in MIL program) - if name in image_input_names and \ - not types.is_tensor(var.sym_type): - raise ValueError("For the image input, '{}', its type in the MIL program must be tensor. " - "Instead it is {}.".format(name, var.sym_type.__type_info__())) - - if types.is_tensor(var.sym_type): - shape = var.sym_type.get_shape() - if any_variadic(shape): - raise ValueError("Variable rank model inputs are not supported!") - if any_symbolic(shape): - symbolic_inputs.append(name) - # We extract the default input shape given by user first - if name in input_shape_map: - shape = input_shape_map[name].shape.default - else: - logger.warning("Input shape not fully specified by enumerated shapes or range dim! 1 will be used for dimension not specified instead.") - # If no input shape is provided (ex. auto conversion of -1 in Tensorflow) - shape = [1 if is_symbolic(d) else d for d in shape] - if name not in image_input_names: - # make a feature type of Type "multiArrayType" - array_type = ft.ArrayFeatureType(shape=shape, dataType=cast_to_framework_io_dtype(var, False)) - input_feature_type.multiArrayType.CopyFrom(array_type) - else: - # make a feature type of Type "imageType" - input_type = image_input_names[name] - _validate_image_input_output_shapes(input_type.color_layout, shape, name, is_input=True) - if not input_type.channel_first: - raise ValueError("Image input, '{}', must be in the channel_first format". - format(name)) - clr_space = _get_colorspace_enum(input_type.color_layout) - image_type = ft.ImageFeatureType(width=shape[-1], - height=shape[-2], - colorSpace=clr_space) - input_feature_type.imageType.CopyFrom(image_type) - - input_features.append( - ml.FeatureDescription(name=name, type=input_feature_type) - ) - elif types.is_scalar(var.sym_type): - array_type = ft.ArrayFeatureType(shape=[1], dataType=cast_to_framework_io_dtype(var, False)) - input_feature_type.multiArrayType.CopyFrom(array_type) - input_features.append(ml.FeatureDescription(name=var.name, type=input_feature_type)) - else: - raise NotImplementedError() - - if output_types is not None and classifier_config is None: - assert len(output_types) == len(prog.functions["main"].outputs), \ - "number of mil program outputs do not match the number of outputs provided by the user" - - for i, var in enumerate(prog.functions["main"].outputs): - output_feature_type = ft.FeatureType() - if types.is_tensor(var.sym_type) or types.is_primitive(var.sym_type): - if output_types is not None and isinstance(output_types[i], ImageType): - if not types.is_tensor(var.sym_type): - raise ValueError("Image output, '{}', is a scalar, but it should be a tensor of rank 4".format( - var.name)) + if types.is_tensor(var.sym_type): shape = var.sym_type.get_shape() if any_variadic(shape): - raise ValueError("Variable rank model outputs, that are ImageTypes, are not supported") - if any([is_symbolic(d) for d in shape]): - raise NotImplementedError("Image output '{}' has symbolic dimensions in its shape". - format(var.name)) - _validate_image_input_output_shapes(output_types[i].color_layout, shape, var.name, is_input=False) - clr_space = _get_colorspace_enum(output_types[i].color_layout) - image_type = ft.ImageFeatureType(width=shape[-1], - height=shape[-2], - colorSpace=clr_space) - output_feature_type.imageType.CopyFrom(image_type) - output_features.append( - ml.FeatureDescription(name=var.name, type=output_feature_type) - ) - else: - dataType = None - if classifier_config is None or var.name != predicted_feature_name: - # Not a classifier output, make sure model output type matches with ML Program type. - dataType = cast_to_framework_io_dtype(var, True) + raise ValueError("Variable rank model inputs are not supported!") + if any_symbolic(shape): + is_input_shape_symbolic = True + # We extract the default input shape given by user first + if name in input_shape_map: + shape = input_shape_map[name].shape.default + else: + logger.warning( + "Input shape not fully specified by enumerated shapes or range dim! 1 will be used for dimension not specified instead." + ) + # If no input shape is provided (ex. auto conversion of -1 in Tensorflow) + shape = [1 if is_symbolic(d) else d for d in shape] + + if name not in image_input_names: + # make a feature type of Type "multiArrayType" + array_type = proto.FeatureTypes_pb2.ArrayFeatureType( + shape=shape, dataType=cast_to_framework_io_dtype(var, False) + ) + input_feature_type.multiArrayType.CopyFrom(array_type) else: - # Classifier outputs are set up separately, so default to fp32 for now. - dataType = ft.ArrayFeatureType.ArrayDataType.FLOAT32 + # make a feature type of Type "imageType" + input_type = image_input_names[name] + _validate_image_input_output_shapes( + input_type.color_layout, shape, name, is_input=True + ) + if not input_type.channel_first: + raise ValueError( + "Image input, '{}', must be in the channel_first format".format(name) + ) + clr_space = _get_colorspace_enum(input_type.color_layout) + image_type = proto.FeatureTypes_pb2.ImageFeatureType( + width=shape[-1], height=shape[-2], colorSpace=clr_space + ) + input_feature_type.imageType.CopyFrom(image_type) - output_shape = ( - None - if any_symbolic(var.shape) or types.is_primitive(var.sym_type) - else var.shape + input_features.append( + proto.Model_pb2.FeatureDescription(name=name, type=input_feature_type) + ) + elif types.is_scalar(var.sym_type): + array_type = proto.FeatureTypes_pb2.ArrayFeatureType( + shape=[1], dataType=cast_to_framework_io_dtype(var, False) + ) + input_feature_type.multiArrayType.CopyFrom(array_type) + input_features.append( + proto.Model_pb2.FeatureDescription(name=var.name, type=input_feature_type) ) - array_type = ft.ArrayFeatureType(shape=output_shape, dataType=dataType) - output_feature_type.multiArrayType.CopyFrom(array_type) - output_features.append(ml.FeatureDescription(name=var.name, type=output_feature_type)) - elif (types.is_dict(var.sym_type)): - output_feature_type.dictionaryType.MergeFromString(b"") - keytype, valtype = var.sym_type.T - if types.is_str(keytype): - output_feature_type.dictionaryType.stringKeyType.MergeFromString(b"") - elif (keytype == types.int64): - output_feature_type.dictionaryType.int64KeyType.MergeFromString(b"") else: - raise ValueError("Dictionary key type not supported.") - output_features.append(ml.FeatureDescription(name=var.name, type=output_feature_type)) - else: - raise NotImplementedError() + raise NotImplementedError(f"Unsupported input type {var.sym_type}.") - # Model description - desc = ml.ModelDescription(input=input_features, output=output_features) - if classifier_config is not None: - desc.predictedFeatureName = predicted_feature_name - desc.predictedProbabilitiesName = predicted_probabilities_name - - # Manually edit output type of predictedFeatureName. - # It doesn't use MLMultiArray and really uses a "primitive" type. - for output in desc.output: - if output.name == predicted_feature_name: - if type(classifier_config.class_labels[0]) == int: - output.type.int64Type.MergeFromString(b"") - else: - output.type.stringType.MergeFromString(b"") - break - - # Create ML Model - model = ml.Model(description=desc, specificationVersion=specification_version) - model.mlProgram.CopyFrom(proto) + if not is_input_shape_symbolic: + continue - # Set symbolic shapes - default_lower_bound = 1 - default_upper_bound = ( - default_lower_bound + 1 if kwargs.get("convert_to", None) == "mlprogram" else -1 - ) - default_bound_used = False - for input_name in symbolic_inputs: - input_type = input_shape_map.get(input_name, None) - - if isinstance(input_type, ImageType): - if isinstance(input_type.shape, EnumeratedShapes): - enumerated_shapes = [] - for s in input_type.shape.shapes: - enumerated_shapes.append( - NeuralNetworkImageSize( - height=s.shape[-2], width=s.shape[-1] + # Set symbolic shapes + default_lower_bound = 1 + default_upper_bound = default_lower_bound + 1 if self.convert_to == "mlprogram" else -1 + default_bound_used = False + input_type = input_shape_map.get(name, None) + + if isinstance(input_type, ImageType): + if isinstance(input_type.shape, EnumeratedShapes): + enumerated_shapes = [] + for s in input_type.shape.shapes: + enumerated_shapes.append( + NeuralNetworkImageSize(height=s.shape[-2], width=s.shape[-1]) ) + flexible_shape_utils._add_enumerated_image_sizes_for_feature( + input_features[-1], sizes=enumerated_shapes ) - add_enumerated_image_sizes( - model, input_name, sizes=enumerated_shapes - ) - else: - img_range = NeuralNetworkImageSizeRange() - H = input_type.shape.shape[-2] - W = input_type.shape.shape[-1] - - if isinstance(H, RangeDim): - img_range.add_height_range((H.lower_bound, H.upper_bound)) - elif is_symbolic(H): - img_range.add_height_range((default_lower_bound, default_upper_bound)) - default_bound_used = True - else: - img_range.add_height_range((H, H)) - if isinstance(W, RangeDim): - img_range.add_width_range((W.lower_bound, W.upper_bound)) - elif is_symbolic(W): - img_range.add_width_range((default_lower_bound, default_upper_bound)) - default_bound_used = True else: - img_range.add_width_range((W, W)) + img_range = NeuralNetworkImageSizeRange() + H = input_type.shape.shape[-2] + W = input_type.shape.shape[-1] + + if isinstance(H, RangeDim): + img_range.add_height_range((H.lower_bound, H.upper_bound)) + elif is_symbolic(H): + img_range.add_height_range((default_lower_bound, default_upper_bound)) + default_bound_used = True + else: + img_range.add_height_range((H, H)) + if isinstance(W, RangeDim): + img_range.add_width_range((W.lower_bound, W.upper_bound)) + elif is_symbolic(W): + img_range.add_width_range((default_lower_bound, default_upper_bound)) + default_bound_used = True + else: + img_range.add_width_range((W, W)) - update_image_size_range( - model, input_name, img_range - ) - elif isinstance(input_type, TensorType): - if isinstance(input_type.shape, EnumeratedShapes): - add_multiarray_ndshape_enumeration( - model, input_name, [tuple(s.shape) for s in input_type.shape.shapes] - ) - else: + flexible_shape_utils._update_image_size_range_for_feature( + input_features[-1], img_range + ) + elif isinstance(input_type, TensorType): + if isinstance(input_type.shape, EnumeratedShapes): + flexible_shape_utils._add_multiarray_ndshape_enumeration_for_feature( + input_features[-1], [tuple(s.shape) for s in input_type.shape.shapes] + ) + else: + lb = [] + ub = [] + for s in input_type.shape.shape: + if isinstance(s, RangeDim): + lb.append(s.lower_bound) + ub.append(s.upper_bound) + elif is_symbolic(s): + lb.append(default_lower_bound) + ub.append(default_upper_bound) + default_bound_used = True + else: + lb.append(s) + ub.append(s) + flexible_shape_utils._set_multiarray_ndshape_range_for_feature( + input_features[-1], lower_bounds=lb, upper_bounds=ub + ) + elif input_type is None: + sym_type = func.inputs[name].sym_type lb = [] ub = [] - for s in input_type.shape.shape: - if isinstance(s, RangeDim): - lb.append(s.lower_bound) - ub.append(s.upper_bound) - elif is_symbolic(s): + for s in sym_type.get_shape(): + if is_symbolic(s): lb.append(default_lower_bound) ub.append(default_upper_bound) default_bound_used = True else: lb.append(s) ub.append(s) - set_multiarray_ndshape_range( - model, input_name, lower_bounds=lb, upper_bounds=ub + flexible_shape_utils._set_multiarray_ndshape_range_for_feature( + input_features[-1], lower_bounds=lb, upper_bounds=ub + ) + + if default_bound_used and self.convert_to == "mlprogram": + warnings.warn( + "Some dimensions in the input shape are unknown, hence they are set to flexible ranges " + f"with lower bound and default value = {default_lower_bound}, and upper bound = " + f"{default_upper_bound}. To set different values for the default shape and upper bound, " + "please use the ct.RangeDim() method as described here: " + "https://coremltools.readme.io/docs/flexible-inputs#set-the-range-for-each-dimension.", + UserWarning, ) - elif input_type is None: - sym_type = prog.functions["main"].inputs[input_name].sym_type - lb = [] - ub = [] - for s in sym_type.get_shape(): - if is_symbolic(s): - lb.append(default_lower_bound) - ub.append(default_upper_bound) - default_bound_used = True + convert_from = self.convert_from + if convert_from is not None and convert_from.startswith("tensorflow"): + warnings.warn( + 'There is "None" dim in TF input placeholder. Please consider specifying ' + 'input shapes by using the "inputs" param in ct.convert().' + ) + + return input_features + + def get_func_output(self, func: mil.Function) -> List[proto.Model_pb2.FeatureDescription]: + """ + Utils to get function output feature description. + """ + + output_types = func.output_types + output_features = [] + + if output_types is not None and self.classifier_config is None: + assert len(output_types) == len( + func.outputs + ), "number of mil program outputs do not match the number of outputs provided by the user" + + for i, var in enumerate(func.outputs): + output_feature_type = proto.FeatureTypes_pb2.FeatureType() + if types.is_tensor(var.sym_type) or types.is_primitive(var.sym_type): + if output_types is not None and isinstance(output_types[i], ImageType): + if not types.is_tensor(var.sym_type): + raise ValueError( + "Image output, '{}', is a scalar, but it should be a tensor of rank 4".format( + var.name + ) + ) + + clr_space = _get_colorspace_enum(output_types[i].color_layout) + + shape = var.sym_type.get_shape() + if any_variadic(shape): + raise ValueError( + "Variable rank model outputs, that are ImageTypes, are not supported" + ) + if any_symbolic(shape): + # For flexible shape output, we set the imageSizeRange to [1, -1], + # util this radar is fixed in CoreML: rdar://122895892 ([Bug] CoreML produce empty dictionary with image output with dynamic shape) + image_type = proto.FeatureTypes_pb2.ImageFeatureType( + width=1, height=1, colorSpace=clr_space + ) + image_type.imageSizeRange.widthRange.lowerBound = 1 + image_type.imageSizeRange.widthRange.upperBound = -1 + image_type.imageSizeRange.heightRange.lowerBound = 1 + image_type.imageSizeRange.heightRange.upperBound = -1 + else: + image_type = proto.FeatureTypes_pb2.ImageFeatureType( + width=shape[-1], height=shape[-2], colorSpace=clr_space + ) + _validate_image_input_output_shapes( + output_types[i].color_layout, shape, var.name, is_input=False + ) + + output_feature_type.imageType.CopyFrom(image_type) + output_features.append( + proto.Model_pb2.FeatureDescription(name=var.name, type=output_feature_type) + ) else: - lb.append(s) - ub.append(s) - set_multiarray_ndshape_range( - model, input_name, lower_bounds=lb, upper_bounds=ub - ) + dataType = None + if self.classifier_config is None or var.name != self.predicted_feature_name: + # Not a classifier output, make sure model output type matches with ML Program type. + dataType = cast_to_framework_io_dtype(var, True) + else: + # Classifier outputs are set up separately, so default to fp32 for now. + dataType = proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.FLOAT32 - if default_bound_used and kwargs.get("convert_to", None) == "mlprogram": - warnings.warn( - "Some dimensions in the input shape are unknown, hence they are set to flexible ranges " - f"with lower bound and default value = {default_lower_bound}, and upper bound = " - f"{default_upper_bound}. To set different values for the default shape and upper bound, " - "please use the ct.RangeDim() method as described here: " - "https://coremltools.readme.io/docs/flexible-inputs#set-the-range-for-each-dimension.", - UserWarning, + output_shape = ( + None + if any_symbolic(var.shape) or types.is_primitive(var.sym_type) + else var.shape + ) + array_type = proto.FeatureTypes_pb2.ArrayFeatureType( + shape=output_shape, dataType=dataType + ) + output_feature_type.multiArrayType.CopyFrom(array_type) + output_features.append( + proto.Model_pb2.FeatureDescription(name=var.name, type=output_feature_type) + ) + elif types.is_dict(var.sym_type): + output_feature_type.dictionaryType.MergeFromString(b"") + keytype, valtype = var.sym_type.T + if types.is_str(keytype): + output_feature_type.dictionaryType.stringKeyType.MergeFromString(b"") + elif keytype == types.int64: + output_feature_type.dictionaryType.int64KeyType.MergeFromString(b"") + else: + raise ValueError("Dictionary key type not supported.") + output_features.append( + proto.Model_pb2.FeatureDescription(name=var.name, type=output_feature_type) + ) + else: + raise NotImplementedError(f"Unsupported output type {var.sym_type}.") + + return output_features + + def get_coreml_model( + self, + input: Dict[str, List[proto.Model_pb2.FeatureDescription]], + output: Dict[str, List[proto.Model_pb2.FeatureDescription]], + specification_version: int, + ) -> proto.Model_pb2.Model: + """ + Utils to get a coreml model description. + """ + # Model description + input_features = input[self._DEFAULT_FUNCTION_NAME] + output_features = output[self._DEFAULT_FUNCTION_NAME] + desc = proto.Model_pb2.ModelDescription(input=input_features, output=output_features) + + if self.classifier_config is not None: + desc.predictedFeatureName = self.predicted_feature_name + desc.predictedProbabilitiesName = self.predicted_probabilities_name + + # Manually edit output type of predictedFeatureName. + # It doesn't use MLMultiArray and really uses a "primitive" type. + for output in desc.output: + if output.name == self.predicted_feature_name: + if type(self.classifier_config.class_labels[0]) == int: + output.type.int64Type.MergeFromString(b"") + else: + output.type.stringType.MergeFromString(b"") + break + + # Create ML Model + model = proto.Model_pb2.Model(description=desc, specificationVersion=specification_version) + model.mlProgram.CopyFrom(self.mil_proto) + + return model + + def export( + self, specification_version: Optional[int] = _SPECIFICATION_VERSION_IOS_15 + ) -> proto.Model_pb2.Model: + + # get functions input / output description + func_to_input = OrderedDict() + func_to_output = OrderedDict() + + for name, func in self.prog.functions.items(): + func_to_input[name] = self.get_func_input(func) + func_to_output[name] = self.get_func_output(func) + + # create a coreml model with I/O description and mil proto + model = self.get_coreml_model( + func_to_input, + func_to_output, + specification_version, ) - convert_from = kwargs.get("convert_from", None) - if convert_from is not None and convert_from.startswith("tensorflow"): - warnings.warn( - 'There is "None" dim in TF input placeholder. Please consider specifying ' - 'input shapes by using the "inputs" param in ct.convert().' - ) - # Set optional inputs - _set_optional_inputs(model, input_types) + # Set optional inputs for main function + _set_optional_inputs(model, self.prog.functions["main"].input_types) + + return model + - return model +def load( + prog: Program, + weights_dir: str, + resume_on_errors: Optional[bool] = False, + specification_version: Optional[int] = _SPECIFICATION_VERSION_IOS_15, + **kwargs, +) -> proto.Model_pb2.Model: + if "main" not in prog.functions: + raise ValueError("main function not found in program") + + # if user has specified "ClassifierConfig", then add the "classify" op to the prog + classifier_config = kwargs.get("classifier_config", None) + predicted_feature_name, predicted_probabilities_name = None, None + if classifier_config is not None: + predicted_feature_name, predicted_probabilities_name = _add_classify_op( + prog, classifier_config + ) + + # convert pymil program into mil proto + mil_proto_exporter = MILProtoExporter( + prog, + weights_dir, + ) + mil_proto = mil_proto_exporter.export(specification_version) + + # return the model provided by users + desc = kwargs.get("model_description", None) + if desc and not isinstance(desc, proto.Model_pb2.ModelDescription): + raise ValueError("Invalid model descriptor") + + if desc: + if classifier_config is not None: + raise AssertionError("Both model_description and classifier_config can't be provided") + model = proto.Model_pb2.Model(description=desc, specificationVersion=specification_version) + model.mlProgram.CopyFrom(mil_proto) + return model + + # create a CoreML model protobuf + exporter_kwargs = CoreMLProtoExporter.get_additional_kwargs(kwargs) + coreml_proto_exporter = CoreMLProtoExporter( + prog, + mil_proto, + predicted_feature_name, + predicted_probabilities_name, + classifier_config=kwargs.get("classifier_config", None), + convert_to=kwargs.get("convert_to", None), + convert_from=kwargs.get("convert_from", None), + **exporter_kwargs, + ) + return coreml_proto_exporter.export(specification_version) diff --git a/coremltools/converters/mil/backend/mil/passes/adjust_io_to_supported_types.py b/coremltools/converters/mil/backend/mil/passes/adjust_io_to_supported_types.py index df8d9349b..acbc729d8 100644 --- a/coremltools/converters/mil/backend/mil/passes/adjust_io_to_supported_types.py +++ b/coremltools/converters/mil/backend/mil/passes/adjust_io_to_supported_types.py @@ -7,6 +7,7 @@ from coremltools import _logger as logger from coremltools.converters.mil._deployment_compatibility import AvailableTarget as target +from coremltools.converters.mil.mil import Block from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import types as types from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass @@ -14,6 +15,7 @@ from coremltools.converters.mil.mil.passes.pass_registry import register_pass +# TODO: rdar://122845072 ([Infra] Refactor the transform_function_signatures, adjust_io_to_supported_types and update_output_dtypes using a shared graph pass) @register_pass(namespace="mil_backend") class adjust_io_to_supported_types(AbstractGraphPass): """ @@ -182,8 +184,10 @@ def _adjust_main_outputs(func): output_var_name = output_var.name output_var.set_name(f"{output_var_name}__pre__output__{target_dtype}__cast") + old_output_var = output_var output_var = mb.cast(x=output_var, dtype=target_dtype) output_var.set_name(output_var_name) + Block._copy_scope_info(old_output_var, output_var) new_outputs.append(output_var) func.set_outputs(new_outputs) diff --git a/coremltools/converters/mil/backend/mil/passes/fuse_activation_silu.py b/coremltools/converters/mil/backend/mil/passes/fuse_activation_silu.py index 5f9270df2..b473c45f1 100644 --- a/coremltools/converters/mil/backend/mil/passes/fuse_activation_silu.py +++ b/coremltools/converters/mil/backend/mil/passes/fuse_activation_silu.py @@ -43,8 +43,11 @@ def _try_to_transform(sigmoid_op, mul_op, block): @block_context_manager def _fuse_activation_silu_block(block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -54,11 +57,9 @@ def _fuse_activation_silu_block(block): mul_op = _match_pattern(op) if mul_op is not None: - fusion_status = _try_to_transform(op, mul_op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if _try_to_transform(op, mul_op, block): + fusion_occurred = True + return fusion_occurred @register_pass(namespace="mil_backend") diff --git a/coremltools/converters/mil/backend/mil/passes/fuse_pow2_sqrt.py b/coremltools/converters/mil/backend/mil/passes/fuse_pow2_sqrt.py index 45e17b77a..0f87c83fb 100644 --- a/coremltools/converters/mil/backend/mil/passes/fuse_pow2_sqrt.py +++ b/coremltools/converters/mil/backend/mil/passes/fuse_pow2_sqrt.py @@ -29,7 +29,7 @@ def _match_pattern(op): # if we have sqrt, check for pow(2) elif sqrt_op and child_ops[0].op_type == "pow" and child_ops[0].y.val == 2: pow_op = child_ops[0] - + # if we don't have both ops, fast fail if not pow_op or not sqrt_op: return None @@ -59,8 +59,10 @@ def _try_to_transform(op1, op2, block): @block_context_manager def _fuse_pow2_sqrt(block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue for b in op.blocks: block_changed = True while block_changed: @@ -70,11 +72,9 @@ def _fuse_pow2_sqrt(block): op2 = _match_pattern(op) if op2 is not None: - fusion_status = _try_to_transform(op, op2, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if _try_to_transform(op, op2, block): + fusion_occurred = True + return fusion_occurred @register_pass(namespace="mil_backend") diff --git a/coremltools/converters/mil/backend/mil/passes/insert_image_preprocessing_op.py b/coremltools/converters/mil/backend/mil/passes/insert_image_preprocessing_op.py index b83a2b430..d9124f742 100644 --- a/coremltools/converters/mil/backend/mil/passes/insert_image_preprocessing_op.py +++ b/coremltools/converters/mil/backend/mil/passes/insert_image_preprocessing_op.py @@ -26,7 +26,7 @@ def apply(self, prog): @block_context_manager def _insert_image_preprocessing_ops(block, prog): - input_types = list(prog.main_input_types) + input_types = list(prog.functions["main"].input_types) for input_type in input_types: if isinstance(input_type, ImageType): diff --git a/coremltools/converters/mil/backend/mil/passes/sanitize_name_strings.py b/coremltools/converters/mil/backend/mil/passes/sanitize_name_strings.py index 9ec899091..b5704ceab 100644 --- a/coremltools/converters/mil/backend/mil/passes/sanitize_name_strings.py +++ b/coremltools/converters/mil/backend/mil/passes/sanitize_name_strings.py @@ -19,4 +19,6 @@ def apply(self, prog): for f in prog.functions.values(): sanitizer_vars = NameSanitizer(prefix="var_") sanitizer_ops = NameSanitizer(prefix="op_") - NameSanitizer.sanitize_block(f, sanitizer_vars, sanitizer_ops, prog.main_input_types) + NameSanitizer.sanitize_block( + f, sanitizer_vars, sanitizer_ops, prog.functions["main"].input_types + ) diff --git a/coremltools/converters/mil/backend/mil/passes/test_passes.py b/coremltools/converters/mil/backend/mil/passes/test_passes.py index e1b8c5de7..84b7cf5ca 100644 --- a/coremltools/converters/mil/backend/mil/passes/test_passes.py +++ b/coremltools/converters/mil/backend/mil/passes/test_passes.py @@ -387,8 +387,8 @@ def assert_block_inputs(prev_inputs, inputs): assert prev_inputs[i].name == inputs[i].name assert inputs[i].dtype == types.fp32 - subblocks = prog.functions['main'].operations[0].blocks - prev_subblocks = prev_prog.functions['main'].operations[0].blocks + subblocks = prog.functions["main"].operations[0].blocks + prev_subblocks = prev_prog.functions["main"].operations[0].blocks for i in range(0, len(subblocks)): assert_block_inputs(prev_subblocks[i].inputs, subblocks[i].inputs) @@ -482,10 +482,9 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 1, 20, 20], - color_layout="G", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType(name="x", shape=[1, 1, 20, 20], color_layout="G", channel_first=True), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -520,11 +519,11 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 1, 20, 20], - scale=2.0, - color_layout="G", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType( + name="x", shape=[1, 1, 20, 20], scale=2.0, color_layout="G", channel_first=True + ), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -561,11 +560,11 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 1, 20, 20], - bias=2.0, - color_layout="G", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType( + name="x", shape=[1, 1, 20, 20], bias=2.0, color_layout="G", channel_first=True + ), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -603,12 +602,16 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 1, 20, 20], - scale=2.0, - bias=2.0, - color_layout="G", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType( + name="x", + shape=[1, 1, 20, 20], + scale=2.0, + bias=2.0, + color_layout="G", + channel_first=True, + ), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -646,10 +649,9 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 3, 20, 20], - color_layout="RGB", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType(name="x", shape=[1, 3, 20, 20], color_layout="RGB", channel_first=True), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -685,12 +687,16 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 3, 20, 20], - scale=2.0, - bias=[1.0, 2.0, 3.0], - color_layout="RGB", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType( + name="x", + shape=[1, 3, 20, 20], + scale=2.0, + bias=[1.0, 2.0, 3.0], + color_layout="RGB", + channel_first=True, + ), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -728,10 +734,9 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 3, 20, 20], - color_layout="BGR", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType(name="x", shape=[1, 3, 20, 20], color_layout="BGR", channel_first=True), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -767,12 +772,16 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 3, 20, 20], - scale=2.0, - bias=[1.0, 2.0, 3.0], - color_layout="BGR", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType( + name="x", + shape=[1, 3, 20, 20], + scale=2.0, + bias=[1.0, 2.0, 3.0], + color_layout="BGR", + channel_first=True, + ), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -815,12 +824,16 @@ def prog(x): z = mb.add(x=y1, y=y2) return z - prog.main_input_types = (ct.ImageType(name='x', - shape=[1, 3, 20, 20], - scale=scale_type(2.0), - bias=np.array([1, 2, 3]).astype(bias_type), - color_layout="RGB", - channel_first=True),) + prog.functions["main"].input_types = ( + ct.ImageType( + name="x", + shape=[1, 3, 20, 20], + scale=scale_type(2.0), + bias=np.array([1, 2, 3]).astype(bias_type), + color_layout="RGB", + channel_first=True, + ), + ) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "mil_backend::insert_image_preprocessing_ops" @@ -1029,7 +1042,7 @@ def program(x): backend=("mlprogram", "fp32"), expected_output_shapes={block.outputs[0].name: tuple(x_shape)}, ) - + @pytest.mark.skipif(ct.utils._macos_version() < (12, 0), reason="mlprogram predict available only on macOS12+") def test_no_pow(self): x_shape = tuple(np.random.randint(low=1, high=4, size=5)) @@ -1051,7 +1064,7 @@ def program(x): backend=("mlprogram", "fp32"), expected_output_shapes={block.outputs[0].name: tuple(x_shape)}, ) - + @pytest.mark.skipif(ct.utils._macos_version() < (12, 0), reason="mlprogram predict available only on macOS12+") def test_no_sqrt(self): x_shape = tuple(np.random.randint(low=1, high=4, size=5)) @@ -1073,7 +1086,7 @@ def program(x): backend=("mlprogram", "fp32"), expected_output_shapes={block.outputs[0].name: tuple(x_shape)}, ) - + @pytest.mark.skipif(ct.utils._macos_version() < (12, 0), reason="mlprogram predict available only on macOS12+") @pytest.mark.parametrize( "reverse_order", itertools.product([True, False]), diff --git a/coremltools/converters/mil/backend/nn/load.py b/coremltools/converters/mil/backend/nn/load.py index 6825b6e63..8c825cb09 100644 --- a/coremltools/converters/mil/backend/nn/load.py +++ b/coremltools/converters/mil/backend/nn/load.py @@ -204,8 +204,8 @@ def load(prog, **kwargs): ) raise ValueError(msg.format(prog)) - input_types = prog.main_input_types - output_types = prog.main_output_types + input_types = prog.functions["main"].input_types + output_types = prog.functions["main"].output_types v1_inputs = [] symbolic_inputs = {} diff --git a/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py b/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py index 7105ea09d..993ad022e 100644 --- a/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py +++ b/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py @@ -9,7 +9,7 @@ def _commingle_loop_vars_block(block): - for op in list(block.operations): + for op in block.operations: for b in op.blocks: _commingle_loop_vars_block(b) @@ -23,7 +23,6 @@ def _commingle_loop_vars_block(block): anchor_op=None, old_var=vx_in, new_var=v_out, - no_check_var_visibility=True, ) # replace block inputs diff --git a/coremltools/converters/mil/backend/nn/passes/conv1d_decomposition.py b/coremltools/converters/mil/backend/nn/passes/conv1d_decomposition.py index 48c207c55..6a477a17d 100644 --- a/coremltools/converters/mil/backend/nn/passes/conv1d_decomposition.py +++ b/coremltools/converters/mil/backend/nn/passes/conv1d_decomposition.py @@ -39,7 +39,11 @@ def apply(self, prog): @block_context_manager def _decompose_conv1d_block(self, block: Block): def help_decompose_conv1d_block(block: Block) -> bool: + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -50,10 +54,9 @@ def help_decompose_conv1d_block(block: Block) -> bool: continue if self._try_apply_transform(op, block): - # has to break as the downstream iterator is affected - return True + fusion_occurred = True - return False + return fusion_occurred block_changed = True while block_changed: diff --git a/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py b/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py index 2effac4f3..a5398441f 100644 --- a/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py +++ b/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py @@ -3,6 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +from coremltools.converters.mil.mil import Block from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass from coremltools.converters.mil.mil.passes.pass_registry import register_pass @@ -15,6 +16,7 @@ def _handle_unused_inputs_func(f): for v in unused_inputs: # copy the input v_tmp = mb.identity(x=v, name=v.name + "_tmp") + Block._copy_scope_info(v, v_tmp) @register_pass(namespace="nn_backend") diff --git a/coremltools/converters/mil/converter.py b/coremltools/converters/mil/converter.py index 72f11769c..f9421b1ad 100644 --- a/coremltools/converters/mil/converter.py +++ b/coremltools/converters/mil/converter.py @@ -72,7 +72,7 @@ def __call__(self, model, *args, **kwargs): # natively supported by MIL ops (ex. Conv/Pool/etc.) if isinstance(inp, ImageType) and inputs[idx].channel_first is None: inputs[idx].channel_first = True - model.set_main_input_types(tuple(inputs)) + model.functions["main"].set_input_types(tuple(inputs)) return model @@ -236,10 +236,12 @@ def _mil_convert( compute_units=compute_units, ) - return modelClass(proto, - mil_program=mil_program, - skip_model_load=kwargs.get('skip_model_load', False), - compute_units=compute_units) + return modelClass( + proto, + mil_program=mil_program, + skip_model_load=kwargs.get("skip_model_load", False), + compute_units=compute_units, + ) def mil_convert_to_proto( diff --git a/coremltools/converters/mil/debugging_utils.py b/coremltools/converters/mil/debugging_utils.py index d6bc5d456..30282659b 100644 --- a/coremltools/converters/mil/debugging_utils.py +++ b/coremltools/converters/mil/debugging_utils.py @@ -23,11 +23,11 @@ def extract_submodel( ) -> MLModel: """ This utility function lets you extract a submodel from a Core ML model. - + For a NeuralNetwork model, the function extracts only in-memory Core ML models. You should always call this function to a model directly from ``ct.convert``. It is not allowed to load the model from disk and then call this API. - + For an ML program model, both cases (in-memory and from disk) are supported. Parameters @@ -37,14 +37,14 @@ def extract_submodel( outputs: list[str] A list of names of Vars, which are the outputs of the extracted submodel. - + inputs: list[str] (Optional) A list of names of Vars, which are the inputs of the extracted submodel. If not provided, the inputs from the original model are used. function_name: str (Optional) Name of the function where the subgraph is extracted. Default ``main``. - + Examples -------- @@ -54,7 +54,7 @@ def extract_submodel( >>> mlmodel = ct.convert(model, convert_to="neuralnetwork") >>> outputs = ["output_0", "output_1"] >>> submodel = extract_submodel(mlmodel, outputs) - + ML Program: >>> from coremltools.converters.mil.debugging_utils import extract_submodel @@ -75,15 +75,15 @@ def validate_inputs(func, input_vars): for op in func.operations: if op.op_type == "const": reachable_vars.add(op.outputs[0]) - + for op in func.operations: if all([x in reachable_vars for x in op.inputs.values()]): reachable_vars.update(op.outputs) - + for out in func.outputs: if out not in reachable_vars: raise ValueError(f"output {output} not reachable from inputs") - + @block_context_manager def replace_inputs(func, input_vars): func_inputs = {} @@ -94,13 +94,12 @@ def replace_inputs(func, input_vars): anchor_op=input.op, old_var=input, new_var=func_inputs[name].outputs[0], - no_check_var_visibility=True, ) func._input_dict = OrderedDict() for k, v in func_inputs.items(): v.set_name(k) func._input_dict[k] = v.outputs[0] - + if not isinstance(outputs, (list, tuple)): raise ValueError(f"outputs must be of type list/tuple. Got {type(outputs)}.") @@ -126,7 +125,7 @@ def replace_inputs(func, input_vars): ) else: program = model._mil_program - + # extract subgraph prog = copy.deepcopy(program) func = prog.functions[function_name] @@ -147,7 +146,7 @@ def replace_inputs(func, input_vars): # Clean up the graph PASS_REGISTRY["common::dead_code_elimination"](prog) - + # If the inputs are provided, we subtract the subgraph starting from them if inputs is not None: if not isinstance(inputs, (list, tuple)): @@ -169,8 +168,8 @@ def replace_inputs(func, input_vars): validate_inputs(func, input_vars) replace_inputs(func, input_vars) PASS_REGISTRY["common::dead_code_elimination"](prog) - + prog.skip_all_passes = True submodel = ct.convert(prog, convert_to=backend, compute_units=model.compute_unit) - + return submodel diff --git a/coremltools/converters/mil/experimental/passes/generic_pass_infrastructure.py b/coremltools/converters/mil/experimental/passes/generic_pass_infrastructure.py index 462a3b35f..cf8f8e416 100644 --- a/coremltools/converters/mil/experimental/passes/generic_pass_infrastructure.py +++ b/coremltools/converters/mil/experimental/passes/generic_pass_infrastructure.py @@ -7,7 +7,9 @@ import warnings from functools import partial +from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil.passes.helper import block_context_manager +from coremltools.converters.mil.mil.scope import ScopeInfo, ScopeSource from ...mil.passes import pass_registry @@ -172,24 +174,26 @@ def _detect_pattern(program_op, ops_arrangement_root_var, block): @block_context_manager def _fuse_one_block(block, ops_arrangement, var_constraints, transform_pattern): - fusion_status = False + fusion_occurred = False for op in list(block.operations): for b in op.blocks: block_changed = True while block_changed: block_changed = _fuse_one_block(b, ops_arrangement, var_constraints, transform_pattern) - ops_arrangement_root_var = list(ops_arrangement.functions.values())[0].function_inputs[0] - fusion_status, pattern = _detect_pattern(op, ops_arrangement_root_var, block) + ops_arrangement_root_var = list( + list(ops_arrangement.functions.values())[0].inputs.values() + )[0] + fusion_occurred, pattern = _detect_pattern(op, ops_arrangement_root_var, block) - if fusion_status: - fusion_status &= var_constraints(pattern) + if fusion_occurred: + fusion_occurred &= var_constraints(pattern) - if fusion_status: + if fusion_occurred: transform_pattern(pattern) - return fusion_status + return fusion_occurred - return fusion_status + return fusion_occurred def fuse_all_blocks(ops_arrangement, var_constraints, transform_pattern, prog): @@ -208,9 +212,10 @@ def __call__(self, prog): if len(self.passes) == 0: raise ValueError("no pass functions associated with " + self.pass_name) - for one_pass in self.passes: - one_pass(prog) - prog.validate() + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=[self.pass_name])): + for one_pass in self.passes: + one_pass(prog) + prog.validate(check_essential_scope=True) def add(self, pass_function): self.passes.append(pass_function) diff --git a/coremltools/converters/mil/frontend/_utils.py b/coremltools/converters/mil/frontend/_utils.py index 3d7d8e168..dc7de2acc 100644 --- a/coremltools/converters/mil/frontend/_utils.py +++ b/coremltools/converters/mil/frontend/_utils.py @@ -2,14 +2,20 @@ # # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + import itertools +import math as math +from typing import List, Optional, Union -from typing import List, Optional +import numpy as _np from coremltools.converters.mil.input_types import InputType from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Var, types -from coremltools.converters.mil.mil.ops.defs._utils import parse_einsum_equation +from coremltools.converters.mil.mil import Operation, Var, types +from coremltools.converters.mil.mil.ops.defs._utils import ( + parse_einsum_equation, + promote_input_dtypes, +) from coremltools.converters.mil.mil.types.symbolic import any_symbolic, is_symbolic @@ -230,6 +236,31 @@ def get_output_names(outputs) -> Optional[List[str]]: return output_names +# This is a workaround in Core ML for topk with dynamic `k`: +# * Core ML topk supports only constant `k` +# * Luckily, Core ML gather supports dynamic `end`, so we workaround by argsort then gather +# This leads to a slightly different behaviour, though: top-k elements are always sorted +def dynamic_topk( + x: Var, k: Var, axis: int, ascending: Optional[bool] = False, name: Optional[str] = None +): + assert k.val is None, "Please use mb.topk directly if k is compile time known" + + indices = mb.argsort(x=x, axis=axis, ascending=ascending) + if name is None: + values = mb.gather_along_axis(x=x, indices=indices, axis=axis) + else: + values = mb.gather_along_axis(x=x, indices=indices, axis=axis, name=name) + + k_indices = mb.range_1d(end=k, start=0, step=1) + values = mb.gather(x=values, indices=k_indices, axis=axis) + if name is None: + indices = mb.gather(x=indices, indices=k_indices, axis=axis) + else: + indices = mb.gather(x=indices, indices=k_indices, axis=axis, name=name) + + return values, indices + + def solve_diagonal_einsum(parsed_vectors, vars): def solve_diagonal_einsum_one_step(parsed_vector, x): for i in range(len(parsed_vector)): @@ -436,3 +467,80 @@ def _concat_dims(dims, none_if_empty=False): else: ab = mb.transpose(x=ab, perm=get_perm_transpose_einsum(ab_reshaped_axes, out_axes), name=name) return ab + + +def _lower_scaled_dot_product_attention(q: Var, k: Var, v: Var, mask: Var, name: str) -> Var: + # scale the query input + embed_size = q.shape[-1] + if is_symbolic(embed_size): + raise ValueError( + "The embedding size, i.e. last dimension of the shape of query tensor" + " cannot be symbolic, in scaled_dot_product_attention op" + ) + multiplicative_scale_factor = 1 / math.sqrt(embed_size) + q, k, v, multiplicative_scale_factor = promote_input_dtypes( + [q, k, v, multiplicative_scale_factor] + ) + q = mb.mul(x=q, y=multiplicative_scale_factor) + + # multiply query and key input tensors + # shape of output: (target_seq, source_seq) or (B,...,target_seq, source_seq) + attn_weights = mb.matmul(x=q, y=k, transpose_y=True) + + # add mask if applicable + if mask is not None: + attn_weights = mb.add(x=attn_weights, y=mask) + + # do softmax + attn_weights_normalized = mb.softmax(x=attn_weights, axis=-1) + + # multiply attn_weights and value tensor + res = mb.matmul(x=attn_weights_normalized, y=v, name=name) + return res + + +def _construct_constexpr_affine_op( + quantized_weights: _np.ndarray, + zero_point: Optional[Union[Var, _np.ndarray, _np.generic]], + scale: Union[Var, _np.ndarray, _np.generic], + axis: Optional[Union[Var, int]] = None, + name: Optional[str] = None, + before_op: Optional[Operation] = None, +) -> Operation: + """Constructs the constexpr op to represent the dequantized weight from PyTorch's data.""" + # The constexpr_affine_dequantize op requires axis. + if axis is None: + # Infer the axis based on scale's shape. + non_single_dim = [dim for dim, dim_size in enumerate(scale.shape) if dim_size > 1] + if len(non_single_dim) > 2: + raise ValueError( + "The constexpr_affine_dequantize op doesn't support scale which " + "have more than one non-single dimensions. Got scale with shape " + f"{scale.shape}" + ) + # If non_single_dim is empty, it means it's per-tensor quantization, just use a dummy axis. + axis = 0 if len(non_single_dim) == 0 else non_single_dim[0] + if isinstance(axis, int): + axis = _np.int32(axis) + + # The constexpr_affine_dequantize op requires zero_point. + if zero_point is None: + zero_point = _np.zeros_like(scale).astype(quantized_weights.dtype) + + # The constexpr_affine_dequantize op requires scale and zero_point to have rank 0 or 1. + if isinstance(scale, (_np.ndarray, _np.generic)): + scale = _np.squeeze(scale) + if isinstance(zero_point, (_np.ndarray, _np.generic)): + zero_point = _np.squeeze(zero_point) + + kwargs = { + "quantized_data": quantized_weights, + "zero_point": zero_point, + "scale": scale, + "axis": axis, + } + if name is not None: + kwargs["name"] = name + if before_op is not None: + kwargs["before_op"] = before_op + return mb.constexpr_affine_dequantize(**kwargs) diff --git a/coremltools/converters/mil/frontend/milproto/load.py b/coremltools/converters/mil/frontend/milproto/load.py index e7ecb0110..b6e39e406 100644 --- a/coremltools/converters/mil/frontend/milproto/load.py +++ b/coremltools/converters/mil/frontend/milproto/load.py @@ -4,10 +4,13 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause import os +from typing import Tuple import numpy as np from coremltools import _logger as logger +from coremltools import proto +from coremltools.converters.mil import mil from coremltools.converters.mil._deployment_compatibility import AvailableTarget as _target from coremltools.converters.mil.backend.mil import helper from coremltools.converters.mil.mil import Block @@ -16,7 +19,6 @@ Function, ListVar, Placeholder, - Program, TupleInputType, Var, mil_list, @@ -24,8 +26,6 @@ ) from coremltools.converters.mil.mil.block import curr_block from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry as _SSAOpRegistry -from coremltools.proto import MIL_pb2 as pm -from coremltools.proto import Model_pb2 as ml from .helper import proto_to_types @@ -63,7 +63,7 @@ def get_var_from_name(self, name): def _load_tensorvalue(tensorvalue_spec): - if not isinstance(tensorvalue_spec, pm.TensorValue): + if not isinstance(tensorvalue_spec, proto.MIL_pb2.TensorValue): raise TypeError("Invalid TensorValue spec object") if tensorvalue_spec.WhichOneof("value") == "floats": @@ -85,7 +85,7 @@ def _load_tensorvalue(tensorvalue_spec): def _load_immediate_value(immediatevalue_spec): - if not isinstance(immediatevalue_spec, pm.Value.ImmediateValue): + if not isinstance(immediatevalue_spec, proto.MIL_pb2.Value.ImmediateValue): raise TypeError("Invalid ImmedidateValue spec object") if immediatevalue_spec.WhichOneof("value") == "tensor": @@ -101,7 +101,7 @@ def _load_immediate_value(immediatevalue_spec): def _load_file_value(context, filevalue_spec, dtype): if BlobReader is None: raise RuntimeError("BlobReader not loaded") - if not isinstance(filevalue_spec, pm.Value.BlobFileValue): + if not isinstance(filevalue_spec, proto.MIL_pb2.Value.BlobFileValue): raise TypeError("Invalid BlobFileValue spec object") filename = os.path.join(context.weights_dir, filevalue_spec.fileName.split("/")[-1]) @@ -132,13 +132,18 @@ def _load_file_value(context, filevalue_spec, dtype): return np_value +def _restore_np_from_bytes_value(value: bytes, dtype: types, shape: Tuple[int]) -> np.ndarray: + return np.frombuffer(value, types.nptype_from_builtin(dtype)).reshape(shape) + + def _load_value(context, value_spec): - if not isinstance(value_spec, pm.Value): + if not isinstance(value_spec, proto.MIL_pb2.Value): raise TypeError("Invalid Value spec object") if value_spec.docString: raise ValueError("Docstring would get lost in the process.") + value_spec_type = value_spec.type.WhichOneof("type") if value_spec.type.WhichOneof("type") == "tensorType": valuetype = proto_to_types(value_spec.type) @@ -152,16 +157,21 @@ def _load_value(context, value_spec): else: value = _load_file_value(context, value_spec.blobFileValue, dtype) + target_np_dtype = types.nptype_from_builtin(dtype) if dtype in helper.IMMEDIATE_VALUE_TYPES_IN_BYTES: - value = np.frombuffer(value, types.nptype_from_builtin(dtype)).reshape( - shape - ) + value = _restore_np_from_bytes_value(value, dtype, shape).astype(target_np_dtype) elif dtype == types.str and shape == (): value = str(value[0]) - elif dtype in (types.fp32, types.str, types.bool, types.int32, types.int64): - value = ( - np.array(value).astype(types.nptype_from_builtin(dtype)).reshape(shape) - ) + elif dtype in ( + types.fp32, + types.str, + types.bool, + types.int16, + types.uint16, + types.int32, + types.int64, + ): + value = np.array(value).astype(target_np_dtype).reshape(shape) else: raise ValueError("Invalid dtype for tensor value") else: @@ -178,7 +188,7 @@ def _create_var_from_spec(spec): This helper function is used for creating PyMIL Var/ListVar from the proto spec. Mainly used for the construction of the control flow ops. """ - assert isinstance(spec, pm.NamedValueType) + assert isinstance(spec, proto.MIL_pb2.NamedValueType) sym_type = proto_to_types(spec.type) name = spec.name if types.is_list(sym_type): @@ -255,20 +265,44 @@ def _dummy_false_fn(*loop_vars): def _load_const_op(context, op_spec): inputs = {k: _load_value(context, v) for k, v in op_spec.attributes.items()} - pymil_var = getattr(mb, op_spec.type)(**inputs) - context.register_var_with_name(op_spec.outputs[0].name, pymil_var) + if len(op_spec.inputs) > 0: + for param_name, argument in op_spec.inputs.items(): + vars = [] + for binding in argument.arguments: + binding_type = binding.WhichOneof("binding") + if binding_type == "name": + vars.append(context.get_var_from_name(binding.name)) + elif binding_type == "value": + vars.append(_load_value(context, binding.value)) + else: + raise ValueError(f"Invalid binding_type {binding_type}") + if len(vars) == 1: + inputs[param_name] = vars[0] + else: + inputs[param_name] = vars + + output_var = getattr(mb, op_spec.type)(**inputs) + + if not isinstance(output_var, (tuple, list)): + output_var = [output_var] + if len(output_var) != len(op_spec.outputs): + raise AssertionError( + "Mismatch between number of outputs in operation specification vs PyMIL outputs" + ) + for spec, var in zip(op_spec.outputs, output_var): + context.register_var_with_name(spec.name, var) -def _load_operation(context, op_spec): - if not isinstance(op_spec, pm.Operation): +def _load_operation(context: TranscriptionContext, op_spec: proto.MIL_pb2.Operation): + if not isinstance(op_spec, proto.MIL_pb2.Operation): raise TypeError("Invalid Operation spec object") op_type = op_spec.type if op_type == "const" or "constexpr_" in op_type: if op_spec.blocks: raise ValueError("const / constexpr operation can't have any block") - if op_spec.inputs: - raise ValueError("const / constexpr operation can't have any input") + if op_type == "const" and op_spec.inputs: + raise ValueError("const operation can't have any input") _load_const_op(context, op_spec) else: @@ -363,7 +397,7 @@ def _load_operation(context, op_spec): def _load_block(context, block_spec): - if not isinstance(block_spec, pm.Block): + if not isinstance(block_spec, proto.MIL_pb2.Block): raise TypeError("Invalid Block spec object") if block_spec.attributes: @@ -383,7 +417,7 @@ def _load_block(context, block_spec): def _load_function(context, func_spec, spec_version): - if not isinstance(func_spec, pm.Function): + if not isinstance(func_spec, proto.MIL_pb2.Function): raise TypeError("Invalid Function spec object") if func_spec.attributes: @@ -415,7 +449,7 @@ def load_mil_proto(program_spec, specification_version, file_weights_dir=""): """ Load in-memory Proto specification of MILSpec.Program(.Proto) object to PyMIL """ - if not isinstance(program_spec, pm.Program): + if not isinstance(program_spec, proto.MIL_pb2.Program): raise TypeError("Invalid Program spec object") if program_spec.docString: @@ -425,7 +459,7 @@ def load_mil_proto(program_spec, specification_version, file_weights_dir=""): raise ValueError("Invalid program version") context = TranscriptionContext(file_weights_dir) - pymil_program = Program() + pymil_program = mil.Program() for func_name, func_spec in program_spec.functions.items(): pymil_program.add_function( func_name, _load_function(context, func_spec, specification_version) @@ -433,7 +467,7 @@ def load_mil_proto(program_spec, specification_version, file_weights_dir=""): for attr_name, attr_spec in program_spec.attributes.items(): if attr_name not in ("buildInfo",): - raise ValueError("Invalid attribute for program") + raise ValueError(f"Invalid attribute {attr_name} for program") return pymil_program @@ -444,7 +478,7 @@ def load(model_spec, specification_version, file_weights_dir="", **kwargs): Set force_spec_version to force override the spec version. """ - if not isinstance(model_spec, ml.Model): + if not isinstance(model_spec, proto.Model_pb2.Model): raise TypeError("Invalid Model sepc object") if specification_version < model_spec.specificationVersion: diff --git a/coremltools/converters/mil/frontend/milproto/test_load.py b/coremltools/converters/mil/frontend/milproto/test_load.py index 69a90e8ca..9e3e10c1b 100644 --- a/coremltools/converters/mil/frontend/milproto/test_load.py +++ b/coremltools/converters/mil/frontend/milproto/test_load.py @@ -150,6 +150,26 @@ def prog(x): assert op_names == new_op_names + def test_mil_uint16(self): + @mb.program( + input_specs=[mb.TensorSpec(shape=(2, 2, 3))], + opset_version=ct.target.iOS17, + ) + def prog(x): + indices = np.array([[[1, 0], [0, 1]], [[1, 0], [0, 0]]], dtype=np.uint16) + res = mb.gather(x=x, indices=indices, axis=2, batch_dims=2) + return res + + mlmodel = ct.convert( + prog, + convert_to="mlprogram", + compute_units=ct.ComputeUnit.CPU_ONLY, + minimum_deployment_target=ct.target.iOS17, + ) + loaded_pymil_prog = get_pymil_prog_from_mlmodel(mlmodel) + assert get_op_types_in_program(loaded_pymil_prog) == get_op_types_in_program(prog) + + @pytest.mark.skipif(ct.utils._macos_version() < (12, 0), reason="mlprogram predict available only on macOS12+") class TestE2ENumericalCorrectness: @pytest.mark.skipif(not _HAS_TORCH, reason="requires torch") diff --git a/coremltools/converters/mil/frontend/tensorflow/converter.py b/coremltools/converters/mil/frontend/tensorflow/converter.py index 8c89f0035..e9131c201 100644 --- a/coremltools/converters/mil/frontend/tensorflow/converter.py +++ b/coremltools/converters/mil/frontend/tensorflow/converter.py @@ -5,12 +5,13 @@ from coremltools import _logger as logger from coremltools.converters._profile_utils import _profile +from coremltools.converters.mil import mil from coremltools.converters.mil._deployment_compatibility import AvailableTarget as _target from coremltools.converters.mil.input_types import ImageType, InputType, RangeDim from coremltools.converters.mil.input_types import Shape as InputShape from coremltools.converters.mil.input_types import TensorType, _get_shaping_class from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Function, Program, get_new_symbol, types +from coremltools.converters.mil.mil import Function, get_new_symbol, types from coremltools.converters.mil.mil.types.symbolic import is_symbolic from coremltools.converters.mil.mil.var import Var @@ -407,7 +408,6 @@ def convert_main_graph(self, prog, graph): func_inputs[input_type.name] = mb.placeholder( input_type.shape.symbolic_shape, dtype=dtype ) - prog.set_main_input_types(self.inputs) with Function(func_inputs, opset_version=self.opset_version) as ssa_func: # Get the input Var @@ -421,6 +421,8 @@ def convert_main_graph(self, prog, graph): outputs = convert_graph(self.context, graph, self.output_names) ssa_func.set_outputs(outputs) prog.add_function("main", ssa_func) + prog.functions["main"].set_input_types(self.inputs) + # check duplicate output # Note: sometimes two outputs are pointing to the same Var, we should # create mb.identity for those cases @@ -506,11 +508,11 @@ def convert_main_graph(self, prog, graph): main_output_types.append(TensorType(name=val.name, dtype=dtype)) self.main_output_types = main_output_types - prog.set_main_output_types(self.main_output_types) + prog.functions["main"].set_output_types(self.main_output_types) @_profile def convert(self): - prog = Program() + prog = mil.Program() if len(self.graph_stack) == 0: raise ValueError("At least one TF function must be present") if self.graph_stack[0] != "main": diff --git a/coremltools/converters/mil/frontend/tensorflow/ops.py b/coremltools/converters/mil/frontend/tensorflow/ops.py index 5b647a78c..632b4b43a 100644 --- a/coremltools/converters/mil/frontend/tensorflow/ops.py +++ b/coremltools/converters/mil/frontend/tensorflow/ops.py @@ -8,6 +8,7 @@ from coremltools import _logger as logger from coremltools.converters.mil._deployment_compatibility import AvailableTarget as target +from coremltools.converters.mil.frontend._utils import dynamic_topk from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.block import is_current_opset_version_compatible_with @@ -2292,34 +2293,40 @@ def Tanh(context, node): @register_tf_op(tf_alias=["TopKV2"]) def TopK(context, node): x = context[node.inputs[0]] - k = context[node.inputs[1]].val - sort = node.attr["sorted"] + k = context[node.inputs[1]] - kwargs = { - "x": x, - "k": k, - "axis": -1, - "name": node.name - } + if k.val is not None: + sort = node.attr["sorted"] - if is_current_opset_version_compatible_with(target.iOS16): - kwargs["sort"] = sort - elif not sort: - raise ValueError("For opset <= iOS16, only sorted=True supported for the topk") + kwargs = {"x": x, "k": k, "axis": -1, "name": node.name} + + if is_current_opset_version_compatible_with(target.iOS16): + kwargs["sort"] = sort + elif not sort: + raise ValueError("For opset <= iOS16, only sorted=True supported for the topk") + + context.add(node.name, mb.topk(**kwargs)) + + else: + context.add(node.name, dynamic_topk(x, k, -1, name=node.name)) - context.add(node.name, mb.topk(**kwargs)) @register_tf_op(tf_alias=["InTopKV2"]) def InTopK(context, node): x = context[node.inputs[0]] target = context[node.inputs[1]] - k = context[node.inputs[2]].val + k = context[node.inputs[2]] _, class_num = x.shape - if not is_symbolic(class_num): - k = min(k, class_num) + if k.val is not None and not is_symbolic(class_num): + k = min(k.val, class_num) + _, indices = mb.topk(x=x, k=k, axis=-1) + else: + x_shape = mb.shape(x=x) + class_num = mb.slice_by_index(x=x_shape, begin=(-1,), end=(-1,), squeeze_mask=(True,)) + k = mb.minimum(x=k, y=class_num) + _, indices = dynamic_topk(x, k, -1) - _, indices = mb.topk(x=x, k=k, axis=-1) target = mb.expand_dims(x=target, axes=[-1]) x = mb.equal(x=target, y=indices) x = mb.cast(x=x, dtype="fp32") diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py index 81d8423e0..14e750f0c 100644 --- a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py +++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py @@ -30,7 +30,7 @@ def apply(self, prog): @block_context_manager def _backfill_make_list_elem_type_block(block): # shallow copy hides changes on f.operations during the loop - for op in block.operations: + for op in list(block.operations): for b in op.blocks: _backfill_make_list_elem_type_block(b) diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py index 1f28bfad3..4573ba51b 100644 --- a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py +++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py @@ -41,7 +41,7 @@ def apply(self, prog): def _expand_tf_lstm_helper(block): # shallow copy hides changes on f.operations during the loop - for op in block.operations[:]: + for op in list(block.operations): for b in op.blocks: _expand_tf_lstm_helper(b) diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py index f22b95bf3..4020db50b 100644 --- a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py +++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py @@ -47,7 +47,7 @@ def apply(self, prog): @block_context_manager def _tf_lstm_to_core_lstm_block(block: Block): # shallow copy hides changes on f.operations during the loop - for op in block.operations: + for op in list(block.operations): for b in op.blocks: _tf_lstm_to_core_lstm_block(b) diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py index 8140bfc36..e31ef6b55 100644 --- a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py +++ b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py @@ -5607,7 +5607,7 @@ class TestTopK(TensorFlowBaseTest): compute_units, backends, [1, 3, 5], - [1, 3], + [1, 3, None], # None denotes dynamic k [True, False], ), ) @@ -5616,19 +5616,38 @@ def test_top_k(self, compute_unit, backend, rank, k, sort): pytest.skip("iOS16 version topk needed for sort = False") if not sort and _macos_version() < (13, 0): pytest.skip("New functionality in macOS13/iOS16") + if rank == 5 and k is None and sort and ( + backend[0] == "neuralnetwork" or ( + platform.machine() == "x86_64" and _macos_version() < (15, 0) + ) + ): + pytest.xfail("rdar://120891130: TopK failing randomly") # TensorFlow only supports last dimension (axis = -1). shape = np.random.randint(low=3, high=4, size=rank) - @make_tf_graph([shape]) - def build_model(x): - ref = tf.math.top_k(x, k=k, sorted=sort) - if not sort: - ref = (tf.sort(ref[0]), tf.sort(ref[1])) - return ref + if k is None: + + @make_tf_graph([shape, (1, tf.int32)]) + def build_model(x, k): + ref = tf.math.top_k(x, k=k[0], sorted=sort) + if not sort: + ref = (tf.sort(ref[0]), tf.sort(ref[1])) + return ref + + else: + + @make_tf_graph([shape]) + def build_model(x): + ref = tf.math.top_k(x, k=k, sorted=sort) + if not sort: + ref = (tf.sort(ref[0]), tf.sort(ref[1])) + return ref model, inputs, outputs = build_model input_values = [random_gen(shape, rand_min=-100, rand_max=100)] + if k is None: + input_values.append(np.random.randint(low=1, high=shape[-1], size=1, dtype=np.int32)) input_dict = dict(zip(inputs, input_values)) TensorFlowBaseTest.run_compare_tf( model, @@ -5645,21 +5664,76 @@ def build_model(x): compute_units, backends, [(1, 3), (1, 10), (3, 50)], - [1, 3, 20], + [1, 3, 20, None], # None denotes dynamic k ), ) def test_in_top_k(self, compute_unit, backend, shape, k): # TensorFlow only supports last dimension (axis = -1). batch_size, class_num = shape - @make_tf_graph([shape, (batch_size, tf.int32)]) - def build_model(predictions, targets): - return tf.math.in_top_k(predictions=predictions, targets=targets, k=k) + if k is None: + + @make_tf_graph([shape, (batch_size, tf.int32), (1, tf.int32)]) + def build_model(predictions, targets, k): + return tf.math.in_top_k(predictions=predictions, targets=targets, k=k[0]) + + else: + + @make_tf_graph([shape, (batch_size, tf.int32)]) + def build_model(predictions, targets): + return tf.math.in_top_k(predictions=predictions, targets=targets, k=k) model, inputs, outputs = build_model pred_values = random_gen(shape, rand_min=-2, rand_max=2) target_values = np.random.randint(class_num, size=batch_size).astype(np.int32) input_values = [pred_values, target_values] + if k is None: + input_values.append(np.random.randint(low=1, high=shape[-1], size=1, dtype=np.int32)) + + input_dict = dict(zip(inputs, input_values)) + TensorFlowBaseTest.run_compare_tf( + model, + input_dict, + outputs, + compute_unit=compute_unit, + backend=backend, + ) + + @pytest.mark.parametrize( + "compute_unit, backend, rank, dynamic", + itertools.product( + compute_units, + backends, + (1, 3, 5), + (True, False), + ), + ) + def test_sort(self, compute_unit, backend, rank, dynamic): + """ + tf.sort dispatches to tf.math.top_k, and k = size of the axis to be sorted + """ + if backend[0] == "mlprogram" and dynamic: + pytest.xfail( + "rdar://116060011: re-activate coremltools tests blocked by Core ML regressions" + ) + + # Here we test the conversion of tf.sort(x, axis=0) + # If dynamic, we prepend None to x shape as the dynamic shape axis + if rank == 5 and dynamic: + rank -= 1 + shape = tuple(np.random.randint(low=3, high=8, size=rank)) + + tf_input_shape = (None,) + shape if dynamic else shape + @make_tf_graph([tf_input_shape]) + def build_model(x): + return tf.sort(x, axis=0) + + model, inputs, outputs = build_model + + if dynamic: + input_values = [random_gen((5,) + shape, rand_min=-100, rand_max=100)] + else: + input_values = [random_gen(shape, rand_min=-100, rand_max=100)] input_dict = dict(zip(inputs, input_values)) TensorFlowBaseTest.run_compare_tf( @@ -5670,6 +5744,7 @@ def build_model(predictions, targets): backend=backend, ) + class TestConcat(TensorFlowBaseTest): @pytest.mark.parametrize( "compute_unit, backend, op_version, rank, num_inputs", diff --git a/coremltools/converters/mil/frontend/torch/converter.py b/coremltools/converters/mil/frontend/torch/converter.py index 4f5b9d7fc..a58401131 100644 --- a/coremltools/converters/mil/frontend/torch/converter.py +++ b/coremltools/converters/mil/frontend/torch/converter.py @@ -12,10 +12,13 @@ from coremltools import _logger as logger from coremltools._deps import _HAS_TORCH_EXPORT_API +from coremltools.converters.mil import mil from coremltools.converters.mil._deployment_compatibility import AvailableTarget as _target from coremltools.converters.mil.input_types import ImageType, InputType, TensorType from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import Function, Placeholder, Program, types +from coremltools.converters.mil.mil.block import is_current_opset_version_compatible_with +from coremltools.converters.mil.mil.scope import ScopeInfo, ScopeSource from coremltools.converters.mil.mil.types import is_float from coremltools.converters.mil.mil.var import Var @@ -28,6 +31,7 @@ flatten_graph_input_values, flatten_graph_output_values, generate_tensor_assignment_ops, + populate_native_const_model_hierarchy, remove_getattr_nodes, transform_inplace_ops, ) @@ -57,12 +61,17 @@ def _convert_to_torch_inputtype(inputs: List[TensorType]) -> List[TensorType]: raise ValueError("Unknown type {} for conversion to InputType.".format(type(_input))) return input_type + class QuantizationContext: """ Utilities to manage information pertaining to quantization of tensors in a PyTorch graph. + + This is necessary only for TorchScript (not ExecuTorch) """ def __init__(self, context: "TranscriptionContext") -> None: + if context.frontend != TorchFrontend.TORCHSCRIPT: + raise ValueError("QuantizationContext is necessary only for TorchScript") self._context = context # Maps var name to tuple of (torch dtype, scale, zero_point) @@ -204,7 +213,8 @@ def __init__( self.frontend = frontend self._current_graph = [{}] self._torch_graph = None - self._quant_context = QuantizationContext(self) + if frontend == TorchFrontend.TORCHSCRIPT: + self._quant_context = QuantizationContext(self) @property def torch_graph(self): @@ -348,13 +358,12 @@ def __init__( self.outputs = outputs self.output_names = get_output_names(self.outputs) self.opset_version = _target(opset_version) if opset_version is not None else None - self.context = TranscriptionContext() - self._prog = Program() + self._prog = mil.Program() if isinstance(loaded_model, torch.jit.ScriptModule): - self.context.frontend = TorchFrontend.TORCHSCRIPT - self.graph, self.params_dict, self.buffer_dict = InternalTorchIRGraph.from_torchscript( - torchscript=loaded_model, input_values=self.inputs, cut_at_symbols=cut_at_symbols + self.context = TranscriptionContext(frontend=TorchFrontend.TORCHSCRIPT) + self.graph = InternalTorchIRGraph.from_torchscript( + torchscript=loaded_model, inputs=self.inputs, cut_at_symbols=cut_at_symbols ) # TODO (rdar://106161395): Register Torch IR passes and unify them into the pass pipeline. @@ -365,14 +374,14 @@ def __init__( flatten_graph_output_values, remove_getattr_nodes, generate_tensor_assignment_ops, + populate_native_const_model_hierarchy, ] for p in passes: p(self.graph) elif _HAS_TORCH_EXPORT_API and isinstance(loaded_model, ExportedProgram): - self.context.frontend = TorchFrontend.EXIR + self.context = TranscriptionContext(frontend=TorchFrontend.EXIR) self.graph = InternalTorchIRGraph.from_exir(exir=loaded_model) - self.params_dict, self.buffer_dict = None, None else: raise ValueError( "Model should be an instance of either torch.jit.ScriptModule or ExportedProgram" @@ -452,13 +461,27 @@ def _create_placeholder( dtype = types.fp32 return mb.placeholder(shape, dtype=dtype) - @staticmethod - def _preprocess_input_vars(input_var): - if ( - types.is_tensor(input_var.sym_type) or types.is_scalar(input_var.sym_type) - ) and input_var.dtype == types.fp16: - input_var = mb.cast(x=input_var, dtype="fp32") - return input_var + def _add_const(self, name: str, val: Union[torch.Tensor, torch._C.ScriptObject]) -> None: + """Create a const op and add it to the graph.""" + if isinstance(val, torch._C.ScriptObject): + logger.info(f"Encountered constant {name} of type _torch._C.ScriptObject") + return + elif isinstance(val, torch.Tensor) and val.is_quantized: + const = _dequantized_weight(val.cpu(), name) + self.context.add(const) + return + elif not isinstance(val, torch.Tensor): + raise ValueError(f"unsupported class for {name} in PyTorch graph: {type(val)}") + val = val.detach().cpu().numpy() + # TODO (rdar://107718371): support uint8 activation quantization in torchscript + # Some torchscript models store indices with uint8, which are unrelated to quantization and + # need to be cast to int32 since many non-quantized Core ML ops do not support int8. + # We need a way to distinguish whether an uint8 is quantization (so should be kept) + # or not (so should be cast to int32). + if self.context.frontend == TorchFrontend.TORCHSCRIPT and val.dtype == np.uint8: + val = val.astype(np.int32) + const = mb.const(val=val, name=name) + self.context.add(const) def check_ops(self): """ @@ -469,24 +492,24 @@ def check_ops(self): def convert_const(self) -> None: for name, val in self.graph.params.items(): - if isinstance(val, torch._C.ScriptObject): - logger.info(f"Encountered constant {name} of type _torch._C.ScriptObject") - continue - elif isinstance(val, torch.Tensor) and val.is_quantized: - const = _dequantized_weight(val.cpu(), name) - self.context.add(const) - continue - elif not isinstance(val, np.ndarray): - raise ValueError(f"unsupported class for {name} in PyTorch graph: {type(val)}") - # TODO (rdar://107718371): support uint8 quantization - # Some torch models store indices with uint8, which are unrelated to quantization and - # need to be cast to int32 since Core ML does not support int8. - # We need a way to distinguish whether an uint8 is quantization (so should be kept) - # or not (so should be cast to int32). - if val.dtype == np.uint8: - val = val.astype(np.int32) - const = mb.const(val=val, name=name) - self.context.add(const) + if self.context.frontend == TorchFrontend.TORCHSCRIPT: + scope_name, scope_type = self.graph.params_scope[name] + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=scope_type), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=scope_name), + ): + self._add_const(name, val) + elif self.context.frontend == TorchFrontend.EXIR: + # ExecuTorch has constants lifted as inputs, yet we have not sorted out + # how to support IO metadata, so for now just put a dummy metadata + # since inputs/constants will not contribute to debugging/profiling + # TODO (rdar://125572392): Support torch.export IO metadata + with mb.scope( + ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None]), + ): + self._add_const(name, val) + else: + raise ValueError(f"Invalid PyTorch frontend {self.context.frontend}") def convert(self) -> Program: logger.info("Converting graph.") @@ -499,7 +522,6 @@ def convert(self) -> Program: # This will hold the converted model. prog = self._prog - prog.set_main_input_types(tuple(self.inputs)) # Construct placeholder for input to SSA function ssa_func_inputs = OrderedDict() @@ -517,7 +539,39 @@ def convert(self) -> Program: internal_names = list(self.graph.inputs.keys()) internal_names.extend(user_names[len(internal_names) :]) for torch_name, ssa_name in zip(internal_names, user_names): - input_var = self._preprocess_input_vars(ssa_func.inputs[ssa_name]) + input_var = ssa_func.inputs[ssa_name] + if self.context.frontend == TorchFrontend.TORCHSCRIPT: + # To create fp16 Core ML model from fp32 torch model, we + # 1. Cast input to fp32 (if specified fp16 input) + # 2. Convert fp32 torch model to fp32 Core ML model + # 3. Graph passes `add_fp16_cast` and `cast_optimization` + # then cast fp32 Core ML model to fp16 + # So here we perform the "cast input to fp32" step + if ( + types.is_tensor(input_var.sym_type) or types.is_scalar(input_var.sym_type) + ) and input_var.dtype == types.fp16: + # This cast should have placeholder scope + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="placeholder" + ), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=torch_name), + ): + input_var = mb.cast(x=input_var, dtype="fp32") + elif self.context.frontend == TorchFrontend.EXIR: + # EXIR has dtypes all determined, so for now we just stick to EXIR dtypes + # TODO (rdar://115845792): Handle fp16 IO dtypes + # When handle user provided IO dtypes, we will also need to handle IO metadata + # TODO (rdar://125572392): Support torch.export IO metadata + if ( + input_var.dtype == types.fp16 + and not is_current_opset_version_compatible_with(_target.iOS16) + ): + raise ValueError( + "To use fp16 input, please set minimum deployment target to iOS16+" + ) + else: + raise ValueError(f"Invalid PyTorch frontend {self.context.frontend}") self.context.add(input_var, torch_name=torch_name) # Convert constants @@ -554,5 +608,21 @@ def convert(self) -> Program: # is True. Make the default output type to fp16 self._adjust_default_output_to_fp16(graph_outputs) if self.outputs is not None: - prog.set_main_output_types(self.outputs) + prog.functions["main"].set_output_types(self.outputs) + + prog.functions["main"].set_input_types(tuple(self.inputs)) + + # Make sure the prog is not missing any scope information + essential_scope_sources = [] + if self.context.frontend == TorchFrontend.TORCHSCRIPT: + essential_scope_sources = [ + ScopeSource.TORCHSCRIPT_MODULE_NAME, + ScopeSource.TORCHSCRIPT_MODULE_TYPE, + ] + elif self.context.frontend == TorchFrontend.EXIR: + essential_scope_sources = [ScopeSource.EXIR_DEBUG_HANDLE] + else: + raise ValueError(f"Invalid PyTorch frontend {self.context.frontend}") + prog._add_essential_scope_source(essential_scope_sources) + prog.validate(check_essential_scope=True) return prog diff --git a/coremltools/converters/mil/frontend/torch/internal_graph.py b/coremltools/converters/mil/frontend/torch/internal_graph.py index bb3cacdee..f22f32c97 100644 --- a/coremltools/converters/mil/frontend/torch/internal_graph.py +++ b/coremltools/converters/mil/frontend/torch/internal_graph.py @@ -4,20 +4,20 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause from collections import OrderedDict +from typing import Any, Dict, List, Optional, Tuple, Union +import numpy as np import torch -import torch.fx -import torch.fx.immutable_collections -import torch.export from coremltools import _logger as logger +from coremltools.converters.mil.input_types import TensorType from .utils import TORCH_DTYPE_TO_NUM, sanitize_op_kind from .exir_utils import extract_inputs_from_exir_program from .torchscript_utils import _expand_and_optimize_ir -def _make_ssa_name(name): +def _make_ssa_name(name: str) -> str: """ Converts a symbol name (string) into an SSA name, by prepending '%'. Only used for pretty printing the graph. @@ -27,7 +27,7 @@ def _make_ssa_name(name): return "%" + name -def _ssa_name_list(names): +def _ssa_name_list(names: List[str]) -> List[str]: """ Take a list of symbol names (strings) and return them as SSA names. Only used for pretty printing the graph. @@ -35,7 +35,7 @@ def _ssa_name_list(names): return [_make_ssa_name(x) for x in names] -def _find_new_name(old_name, node_names): +def _find_new_name(old_name: str, node_names: List[str]) -> str: """ Disambiguate a node's name from a list of existing node names by adding successively larger integers. @@ -48,7 +48,7 @@ def _find_new_name(old_name, node_names): return new_name -def _replace_in_list(ls, old_val, new_val): +def _replace_in_list(ls: List[Any], old_val: Any, new_val: Any) -> None: """Helper function to replace a value in a list.""" try: idx = ls.index(old_val) @@ -63,11 +63,17 @@ class InternalTorchIRBlock: coremltools internal representation of a torch IR block. """ - def __init__(self, parent=None, nodes=None, inputs=None, outputs=None): + def __init__( + self, + parent: Optional["InternalTorchIRNode"] = None, + nodes: Optional[List["InternalTorchIRNode"]] = None, + inputs: Optional[List[str]] = None, + outputs: Optional[List[str]] = None, + ): """ Arguments: parent: The InternalTorchIRNode this block belongs to. - nodes: list of InternalTorchIRNodes in the block + nodes: list of InternalTorchIRNode in the block inputs: list of input symbols. outputs: list of output symbols. """ @@ -152,13 +158,15 @@ class InternalTorchIRNode: def __init__( self, - kind, - inputs, - outputs, - name=None, - parent=None, - attr=None, - blocks=None, + kind: str, + inputs: List[str], + outputs: List[str], + name: Optional[str] = None, + parent: Optional[Union["InternalTorchIRGraph", "InternalTorchIRBlock"]] = None, + attr: Optional[Dict[str, Any]] = None, + blocks: Optional[List["InternalTorchIRBlock"]] = None, + model_hierarchy: Optional[str] = None, + meta: Optional[Dict] = None, ): """ Arguments: @@ -169,6 +177,8 @@ def __init__( parent: The InternalTorchIRGraph/Block this node belongs to. attr: dict of named attributes. blocks: list of InternalTorchIRBlock. + model_hierarchy: str represents TorchScript node's model hierarchy. + meta: A dictionary of torch fx node metadata inherited from torch.fx.Node.meta """ if not name and not outputs: self.name = "" @@ -181,6 +191,8 @@ def __init__( self.parent = parent self.attr = attr if attr is not None else {"value": None} self.blocks = blocks if blocks is not None else [] + self.model_hierarchy = model_hierarchy + self.meta = meta @classmethod def from_torchscript_node(cls, node, parent): @@ -211,6 +223,7 @@ def from_torchscript_node(cls, node, parent): outputs=outputs, attr=attr, blocks=None, + model_hierarchy=node.getModuleHierarchy(), ) internal_node.blocks = [ InternalTorchIRBlock.from_torchscript_block(block=b, parent=internal_node) @@ -267,6 +280,7 @@ def get_arguments(alist): parent=None, attr=None, blocks=None, + meta=node.meta, ) def __str__(self, indent=2): @@ -297,13 +311,89 @@ def replace_name(self, old_name, new_name): for block in self.blocks: block.replace_name(old_name, new_name) + def get_scope_info(self) -> Tuple[List[str], List[str]]: + """ + Get the scope information (``scope_name``, ``scope_type``) of a TorchScript node. + In a TorchScript node, a model hierarchy is represented in a string of format: + ``scope_name_1(scope_type_1).scope_name_2(scope_type_1).<...>.scope_name_n(scope_type_n)`` + For instance, given a torch model: + + class SubModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear_1 = torch.nn.Linear(2, 3) + + def forward(self, x): + x_1 = self.linear(x) + x_2 = torch.relu(x_1) + return x_2 + + class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.submodule_1 = SubModule() + + def forward(self, x): + return self.submodule_1(x) + + The model hierarchy of ``x_1`` is ``submodule_1(SubModule).linear_1(Linear)``, + and ``x_2`` has ``submodule_1(SubModule)``. + + We consider the ``node.name`` as the most inner ``scope_name``, and + ``node.kind`` (aten op type) as the most inner ``scope_type``. + + ``x_1`` results in: + { + "scope_name": ["submodule_1", "linear_1", "x_1"], + "scope_type": ["SubModule", "Linear", "linear"], + }, + and ``x_2`` gets: + { + "scope_name": ["submodule_1", "x_2"], + "scope_type": ["SubModule", "relu"], + }. + + Note that, for the model weight const ops, the names are in the following format: + "submodule_1.linear_1.weight", which would result in a long ``scope_name``: + ``["submodule_1", "linear_1", "submodule_1.linear_1.weight"]``. + This function does a special handling to trim it to: + ``["submodule_1", "linear_1", "weight"]`` + """ + + def _trim_scopename_for_weight(scope_names: List[str]) -> List[str]: + weight_name = scope_names[-1] + if scope_names[:-1] != weight_name.split(".")[:-1]: + return scope_names + scope_names[-1] = weight_name.split(".")[-1] + return scope_names + + if self.model_hierarchy == "" or self.model_hierarchy is None: + scopes = [] + else: + scopes = self.model_hierarchy.split(".") + scope_names, scope_types = [], [] + for val in scopes: + if val == "": + scope_names.append("UNKNOWN_SCOPE_NAME") + scope_types.append("UNKNOWN_SCOPE_TYPE") + continue + if val.count("(") != 1 or val.count(")") != 1: + raise ValueError(f"{val} is not a valid model hierarchy string.") + lower_idx, upper_idx = val.index("("), val.index(")") + scope_names.append(val[:lower_idx]) + scope_types.append(val[lower_idx + 1 : upper_idx]) + scope_names.append(self.name) + scope_types.append(self.kind) + if self.kind == "getattr": + scope_names = _trim_scopename_for_weight(scope_names) + return scope_names, scope_types class InternalTorchIRGraph: """ - CoreML internal representation of a torch IR graph. A torch._C.Graph + Core ML internal representation of a torch IR graph. A torch._C.Graph object is not an ideal structure to use in converting to CoreML. Conversion to an InternalTorchIRGraph is inserted between the original graph and the - final CoreML model to address several issues: + final Core ML model to address several issues: 1. A torch._C.graph is hard to work with. For example, its .inputs() and .outputs() functions return iterators, so the only way to determine the number of inputs/outputs is by counting to the end. @@ -322,33 +412,35 @@ class InternalTorchIRGraph: def __init__( self, - params, - inputs, - outputs, - nodes=None, + params: Dict[str, np.ndarray], + inputs: Dict[str, TensorType], + outputs: List[str], + nodes: Optional[List["InternalTorchIRNode"]] = None, + buffers: Optional[Dict[str, torch.Tensor]] = None, ): """ Arguments: params: dict mapping parameter names to their numpy value. - inputs: OrderedDict mapping input names to their example values. + inputs: OrderedDict mapping input names to their input types. outputs: list[str], list of outputs from the graph. - nodes: list of InternalTorchIRNodes in the graph. + nodes: list of InternalTorchIRNode in the graph. + buffers: Dict mapping torch model buffers to their names. """ self.nodes = nodes self.params = params self.inputs = inputs self.outputs = outputs + self.buffers = buffers + self.params_scope = {} @classmethod - def from_torchscript(cls, torchscript, input_values=None, cut_at_symbols=None): + def from_torchscript(cls, torchscript, inputs=None, cut_at_symbols=None): """ Arguments: torchscript: TorchScript object representing the model to convert. - input_values: A list of inputs to the graph. Must be given is - @raw_graph if not None. + inputs: A list of input types to the graph. cut_at_symbols: The list of desired outputs from the graph. Symbols - must be present in the graph. For debugging use only. Can only - be given if @raw_graph is not None. + must be present in the graph. For debugging use only. """ if not isinstance(torchscript, torch.jit.ScriptModule): raise AssertionError( @@ -367,34 +459,21 @@ def from_torchscript(cls, torchscript, input_values=None, cut_at_symbols=None): ) nodes = [] - params = {} - inputs = OrderedDict() + inputs_name_to_type = OrderedDict() outputs = [] - raw_graph, params_dict, buffer_dict = _expand_and_optimize_ir(torchscript) - - # Add params - for name, param in params_dict.items(): - if isinstance(param, torch.Tensor): - if param.is_quantized: - value = param - else: - value = param.detach().cpu().numpy() - else: - value = param - params[name] = value + raw_graph, params, buffers = _expand_and_optimize_ir(torchscript) # Add inputs # The first element of the raw_graph.inputs() is the 'self' of the module, which is not used. graph_inputs = list(raw_graph.inputs())[1:] - if len(graph_inputs) != len(input_values): - raise ValueError( - f"Number of TorchScript inputs ({len(graph_inputs)}) must match the user provided inputs ({len(input_values)})." - ) + if len(graph_inputs) != len(inputs): + raise ValueError( + f"Number of TorchScript inputs ({len(graph_inputs)}) must match the user provided inputs ({len(inputs)})." + ) for index, _input in enumerate(graph_inputs): name = _input.debugName() - value = input_values[index] - inputs[name] = value + inputs_name_to_type[name] = inputs[index] # Add outputs, cutting if @cut_at_symbols is set output_names = cut_at_symbols @@ -403,10 +482,16 @@ def from_torchscript(cls, torchscript, input_values=None, cut_at_symbols=None): for output in output_names: outputs.append(output) - internal_graph = cls(nodes=nodes, params=params, inputs=inputs, outputs=outputs) + internal_graph = cls( + nodes=nodes, + params=params, + inputs=inputs_name_to_type, + outputs=outputs, + buffers=buffers, + ) - node_names = set() # Add nodes + node_names = set() for raw_node in raw_graph.nodes(): new_node = InternalTorchIRNode.from_torchscript_node( node=raw_node, parent=internal_graph @@ -416,10 +501,24 @@ def from_torchscript(cls, torchscript, input_values=None, cut_at_symbols=None): internal_graph.nodes.append(new_node) node_names.add(new_node.name) - return internal_graph, params_dict, buffer_dict + internal_graph._cache_model_hierarchy_for_params() + + return internal_graph + + def _cache_model_hierarchy_for_params(self) -> None: + # We cache the model hierarchy information for model weights in self.params_scope, + # since self.params doesn't contain the information. + def cache_model_hierarchy_block(block): + for node in block.nodes: + for b in node.blocks: + cache_model_hierarchy_block(b) + if node.name in self.params: + self.params_scope[node.name] = node.get_scope_info() + cache_model_hierarchy_block(self) @classmethod - def from_exir(cls, exir: torch.export.ExportedProgram): + def from_exir(cls, exir): + # exir: torch.export.ExportedProgram exported_program = exir nodes = [] @@ -436,19 +535,29 @@ def from_exir(cls, exir: torch.export.ExportedProgram): inputs_to_buffers = exported_program.graph_signature.inputs_to_buffers inputs_to_consts = {**inputs_to_parameters, **inputs_to_buffers} - - parameters_to_inputs = { + consts_to_inputs = { v: k if not k.startswith("%") else k[1:] for k, v in inputs_to_consts.items() } # Add params for name, param in exported_program.state_dict.items(): - if isinstance(param, torch.Tensor): - value = param.detach().cpu().numpy() + if not isinstance(param, torch.Tensor): + raise NotImplementedError( + f"For ExecuTorch paramter, only support torch.Tensor, but got {type(param)}" + ) + params[name if name not in consts_to_inputs else consts_to_inputs[name]] = param + # Non-persistent buffers may be missing from state_dict, but we still need their values + # Reference: https://github.com/pytorch/executorch/pull/1802 + for name, buffer in zip(exported_program.graph_signature.buffers, exported_program.buffers()): + if not isinstance(buffer, torch.Tensor): + raise NotImplementedError( + f"For ExecuTorch buffer, only support torch.Tensor, but got {type(buffer)}" + ) + params_name = consts_to_inputs[name] + if params_name in params: + assert torch.equal(params[params_name], buffer) else: - raise NotImplementedError("Only torch.Tensor handled yet") - - params[name if name not in parameters_to_inputs else parameters_to_inputs[name]] = value + params[params_name] = buffer graph_module = exported_program.graph_module graph = graph_module.graph @@ -466,7 +575,7 @@ def from_exir(cls, exir: torch.export.ExportedProgram): # e.g. higher-level callables such as "call_delegate" if not isinstance(attr, torch.Tensor): raise NotImplementedError("Only torch.Tensor attr handled yet") - params[name] = attr.detach().cpu().numpy() + params[name] = attr elif node.op == "placeholder": continue elif node.op == "output": diff --git a/coremltools/converters/mil/frontend/torch/ops.py b/coremltools/converters/mil/frontend/torch/ops.py index e6694be13..febb96586 100644 --- a/coremltools/converters/mil/frontend/torch/ops.py +++ b/coremltools/converters/mil/frontend/torch/ops.py @@ -17,6 +17,8 @@ from coremltools import _logger as logger from coremltools.converters.mil._deployment_compatibility import AvailableTarget as target +from coremltools.converters.mil.frontend import _utils +from coremltools.converters.mil.frontend._utils import dynamic_topk from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import Symbol, types from coremltools.converters.mil.mil.block import is_current_opset_version_compatible_with @@ -25,6 +27,7 @@ promote_input_dtypes, solve_slice_by_index_shape, ) +from coremltools.converters.mil.mil.scope import ScopeInfo, ScopeSource from coremltools.converters.mil.mil.types import is_bool, nptype_from_builtin from coremltools.converters.mil.mil.types.symbolic import any_symbolic, is_symbolic from coremltools.converters.mil.mil.types.type_mapping import builtin_to_string @@ -75,35 +78,64 @@ def convert_nodes(context, graph): graph: An InternalTorchIRGraph or InternalTorchIRBlock object. """ for node in _tqdm(graph.nodes, desc="Converting PyTorch Frontend ==> MIL Ops", unit=" ops"): - op_lookup = node.kind - add_op = _TORCH_OPS_REGISTRY.get_func(op_lookup) - if add_op is None: - if re.match(r".*_dynamic", op_lookup): - raise RuntimeError( - f"PyTorch convert function for op '{op_lookup}' not implemented.\n" - "Dynamic quantized models are not supported by Core ML.\n" - "Please use static quantization or the APIs in coremltools.optimize to quantize/compress models." - ) - else: - raise RuntimeError( - f"PyTorch convert function for op '{op_lookup}' not implemented." - ) + try: + convert_single_node(context, node) + except Exception as e: + scope_names = node.get_scope_info()[0] + op_location = '/'.join(scope_names[:-1]) + logger.error(f"\n\nERROR - converting '{node.kind}' op (located at: '{op_location}'):\n") + raise e # re-raise exception + + if _all_outputs_present(context, graph): + # We've generated all the outputs the graph needs, terminate conversion. + break + + +def convert_single_node(context, node): + """ + Converts a single lowered PyTorch op to MIL. - logger.info("Converting op {} : {}".format(node.name, op_lookup)) + Arguments: + context: A TranscriptionContext object to pull node inputs and + assign node outputs. + node: lowered PyTorch op to convert. + """ + op_lookup = node.kind + add_op = _TORCH_OPS_REGISTRY.get_func(op_lookup) + if add_op is None: + if re.match(r".*_dynamic", op_lookup): + raise RuntimeError( + f"PyTorch convert function for op '{op_lookup}' not implemented.\n" + "Dynamic quantized models are not supported by Core ML.\n" + "Please use static quantization or the APIs in coremltools.optimize to quantize/compress models." + ) + else: + raise RuntimeError( + f"PyTorch convert function for op '{op_lookup}' not implemented." + ) + + logger.info("Converting op {} : {}".format(node.name, op_lookup)) + + scopes = [] + if context.frontend == TorchFrontend.TORCHSCRIPT: + scope_name, scope_type = node.get_scope_info() + scopes = [ + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=scope_type), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=scope_name), + ] + elif context.frontend == TorchFrontend.EXIR: + scopes = [ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[node.meta["debug_handle"]])] + else: + raise ValueError(f"Invalid PyTorch frontend {context.frontend}") + with mb.scope(*scopes): if context.frontend == TorchFrontend.TORCHSCRIPT: context.quant_context.maybe_handle_quantized_inputs(node) context.prepare_for_conversion(node) - add_op(context, node) - if _TORCH_OPS_REGISTRY.is_inplace_op(op_lookup): context.process_inplace_op(node) - # We've generated all the outputs the graph needs, terminate conversion. - if _all_outputs_present(context, graph): - break - def convert_block(context, block, inputs): """Convert a block (sub-graph) to MIL. Conversion happens within a new @@ -177,7 +209,7 @@ def get_bindings(alist) -> List[Any]: raise NotImplementedError(f"Binding of inputs of type {type(i)} not handled yet") return results - + def check_if_number_of_inputs_expected(num_inputs: int, expected: Union[int, List, Tuple]) -> None: expected = [expected] if isinstance(expected, int) else expected if num_inputs not in expected: @@ -186,7 +218,7 @@ def check_if_number_of_inputs_expected(num_inputs: int, expected: Union[int, Lis node.name, node.kind, num_inputs, expected ) ) - + def check_if_number_of_inputs_more_than_min_expected(num_inputs: int, min_expected: int) -> None: if num_inputs < min_expected: raise ValueError( @@ -242,7 +274,16 @@ def _is_const(var, optional=False): return True if isinstance(var, np.ndarray): return True - return var is not None and (var.val is not None or var.op.op_type.startswith("constexpr_")) + return ( + var is not None + and isinstance(var, Var) + and var.op is not None + and ( + var.op.op_type.startswith("constexpr_") + or (var.op.op_type == "dequantize" and var.op.can_materialize_val()) + or var.val is not None + ) + ) def _create_linear_layer(x, w, bias): """ @@ -287,6 +328,15 @@ def _construct_constant(val, name): return mb.const(val=val, name=name) +@register_torch_op +def native_dropout(context, node): + if context.frontend == TorchFrontend.EXIR: + inputs = _get_inputs(context, node, min_expected=2) + context.add((inputs[0],), node.name) + else: + raise ValueError(f"native_dropout should only appear in EXIR, but got {context.frontend}") + + @register_torch_op def affine_grid_generator(context, node): # rdar://73165386 (Improve error handling of coremltools "affine" op PyTorch conversion.) @@ -822,9 +872,12 @@ def pixel_unshuffle(context, node): @register_torch_op(torch_alias=["bmm", "mm"]) def matmul(context, node): inputs = _get_inputs(context, node, expected=2) - if inputs[1].val is not None and \ - len(inputs[1].shape) == 2 and len(inputs[0].shape) <= 3: - res = mb.linear(x=inputs[0], weight=_np.transpose(inputs[1].val), name=node.name) + if (len(inputs[1].shape) == 2 and len(inputs[0].shape) <= 3) and ( + _is_const(inputs[1]) or inputs[1].is_descendant_of_const + ): + linear_x, weight = inputs + transposed_weight = mb.transpose(x=weight, perm=(1, 0)) + res = mb.linear(x=linear_x, weight=transposed_weight, name=node.name) else: x, y = promote_input_dtypes([inputs[0], inputs[1]]) res = mb.matmul(x=x, y=y, name=node.name) @@ -1403,9 +1456,9 @@ def max_pool3d(context, node): @register_torch_op def minimum(context, node): inputs = _get_inputs(context, node, expected=2) + x, y = promote_input_dtypes(inputs) assert len(node.outputs) == 1 - x = context[node.inputs[0]] - y = context[node.inputs[1]] + out = mb.minimum(x=x, y=y, name=node.name) context.add(out) @@ -1420,9 +1473,9 @@ def clamp_min(context, node): @register_torch_op def maximum(context, node): inputs = _get_inputs(context, node, expected=2) + x, y = promote_input_dtypes(inputs) assert len(node.outputs) == 1 - x = context[node.inputs[0]] - y = context[node.inputs[1]] + out = mb.maximum(x=x, y=y, name=node.name) context.add(out) @@ -1532,7 +1585,7 @@ def sub(context, node): ] ) def mean(context, node): - inputs = _get_inputs(context, node) + inputs = _get_inputs(context, node, min_expected=1) x = inputs[0] if types.is_bool(x.dtype): @@ -1542,7 +1595,7 @@ def mean(context, node): kwargs = {"x": x, "name": node.name} # @axes is optional, so omit if None. - axes = inputs[1] + axes = None if len(inputs) < 2 else inputs[1] if axes is not None: # @axes needs to be a list, but if only one axis was specified in the # model, it will be constructed as an int. Construct a new constant as a @@ -1718,7 +1771,7 @@ def _adaptive_pool1d(context, node, reduce_op): keep_dims=True ) pool_results.append(cur_result) - + context.add( mb.reshape( x=mb.concat(values=pool_results, axis=-1), @@ -3333,6 +3386,63 @@ def _loop_body(*loop_vars): context.add(output_var, torch_name=output_name) +@register_torch_op +def _unique2(context, node): + (x, sorted, return_inverse, return_counts) = _get_inputs(context, node, expected=4) + + # Unsupported case + if sorted.val is not True: + raise NotImplementedError("sorted=False not supported for unique op") + + x_flatten = mb.reshape(x=x, shape=[-1]) + + # Sort flattened input + indices = mb.argsort(x=x_flatten, ascending=True) + x_sorted = mb.gather_along_axis(x=x_flatten, indices=indices) + + # Subtract n_th+1 element from n_th element + neg_inf = np.float32(-np.inf) + x_sorted = mb.cast(x=x_sorted, dtype="fp32") + x_sorted_shifted = mb.pad(x=x_sorted, pad=[1, 0], constant_val=neg_inf) + x_sorted_padded = mb.pad(x=x_sorted, pad=[0, 1], mode="replicate") + diff = mb.sub(x=x_sorted_padded, y=x_sorted_shifted) + + # Get non-zero element after subtraction to determine unique values + non_zero_indices = mb.non_zero(x=diff) + unique_values_unsqueeze = mb.gather(x=x_sorted, indices=non_zero_indices) + unique_values = mb.squeeze(x = unique_values_unsqueeze) + + # Add unique values to output and see if we're done. + context.add(unique_values, torch_name=node.outputs[0]) + if return_counts.val is False and return_inverse.val is False: + # only the unique values are needed + return + + # Calculate a UxN boolean tensor, where: + # U - number of unique values + # N - number of input elements + num_unique_values = mb.shape(x=unique_values) + x_tile = mb.tile(x=x_flatten, reps=num_unique_values) + tile_shape = mb.concat(values=(num_unique_values, mb.shape(x=x_flatten)), axis=0) + x_tile = mb.reshape(x=x_tile, shape=tile_shape) + unique_values_unsqueeze = mb.cast(x=unique_values_unsqueeze, dtype="int32") + x_tile, unique_values_unsqueeze = promote_input_dtypes([x_tile, unique_values_unsqueeze]) + diff = mb.sub(x=x_tile, y=unique_values_unsqueeze) + bool_tensor = mb.logical_not(x=mb.cast(x=diff, dtype="bool")) + + if return_inverse.val is True: + # Get indices + range = mb.range_1d(start=0, end=mb.squeeze(x=num_unique_values), step=1) + indices = mb.matmul(x=range, y=mb.cast(x=bool_tensor, dtype="int32")) + indices = mb.reshape(x=indices, shape=mb.shape(x=x)) + context.add(indices, torch_name=node.outputs[1]) + + if return_counts.val is True: + # Get counts + counts = mb.reduce_sum(x=mb.cast(x=bool_tensor, dtype='int32'), axes=(-1,)) + context.add(counts, torch_name=node.outputs[2]) + + @register_torch_op(torch_alias=["if"]) def _if(context, node): """ In TorchIR, a conditional looks like: @@ -3959,9 +4069,10 @@ def full(context, node): @register_torch_op def full_like(context, node): - inputs = _get_inputs(context, node, expected=7) + inputs = _get_inputs(context, node, min_expected=2) x = inputs[0] val = inputs[1].val + if is_current_opset_version_compatible_with(target.iOS16): result = mb.fill_like(ref_tensor=x, value=val, name=node.name) else: @@ -4066,13 +4177,13 @@ def _avg_pool(context, node, inputs): kernel_sizes = inputs[1] - strides = None + strides = kernel_sizes # default strides = kernel sizes if len(inputs) > 2: - strides = ( - mb.const(val=kernel_sizes.val, name=inputs[2].name) - if inputs[2].op.op_type == "const" and (not list(inputs[2].val)) - else inputs[2] - ) + strides = inputs[2] + # TorchScript may give us empty stride, in such case + # we still default strides to kernel sizes, but name conform to TorchScript + if strides.op.op_type == "const" and (not list(strides.val)): + strides = mb.const(val=kernel_sizes.val, name=strides.name) pad_type = "custom" # Need to explicitly state L-R, T-B pad @@ -4082,7 +4193,7 @@ def _avg_pool(context, node, inputs): include_pad = True if len(inputs) < 6 else inputs[5].val - spatial_rank = len(pad) // 2 + spatial_rank = 0 if pad is None else len(pad) // 2 if spatial_rank > 2 and ceil_mode is True and list(strides.val) != [1] * len(strides.val): # since MIL does not support ceil_mode for 3D pool, # need to adjust padding values if ceil_mode is True @@ -4390,8 +4501,14 @@ def to(context, node): casted_input = torch.tensor(_input.val).type(torch_dtype).cpu().numpy() res = mb.const(val=casted_input, name=node.name) else: - if dtype in NUM_TO_DTYPE_STRING: - res = mb.cast(x=_input, dtype=NUM_TO_DTYPE_STRING[dtype], name=node.name) + dtype_str = NUM_TO_DTYPE_STRING[dtype] + valid_dtypes = ( + {"int8", "uint8", "int16", "uint16", "int32", "fp16", "fp32", "bool"} + if is_current_opset_version_compatible_with(target.iOS17) + else {"int32", "fp16", "fp32", "bool"} + ) + if dtype_str in valid_dtypes: + res = mb.cast(x=_input, dtype=dtype_str, name=node.name) else: # For dtype that is not supported by mb.cast, we do it in best-effort to cast it to int # or float based on the dtype. @@ -4648,7 +4765,7 @@ def meshgrid(context, node): ] ) def noop(context, node): - logger.info("Setting pytorch op: {} to no-op.".format(node)) + logger.info(f"Setting pytorch op: {node.kind} to no-op.") inputs = _get_inputs(context, node) _input = inputs[0] context.add(_input, torch_name=node.name) @@ -4892,8 +5009,7 @@ def _abs(context, node): @register_torch_op def repeat(context, node): - x = context[node.inputs[0]] - reps = context[node.inputs[1]] + x, reps = _get_inputs(context, node, expected=2) if isinstance(reps, list): reps = mb.concat(values=reps, axis=0) @@ -5334,21 +5450,15 @@ def where(context, node): return assert len(inputs) == 3 - cond = inputs[0] + cond, a, b = inputs + a, b = promote_input_dtypes([a, b]) if not types.is_bool(cond.dtype): # cond must be bool type cond = mb.cast(x=cond, dtype="bool") - if not any([any_symbolic(x.shape) for x in inputs[:3]]): + if not any([any_symbolic(x.shape) for x in (cond, a, b)]): # broadcast all tensors to the same shape - broadcast_inputs = _broadcast_tensors([cond, inputs[1], inputs[2]]) - result = mb.select( - cond=broadcast_inputs[0], - a=broadcast_inputs[1], - b=broadcast_inputs[2], - name=node.name, - ) - else: - result = mb.select(cond=cond, a=inputs[1], b=inputs[2], name=node.name) + cond, a, b = _broadcast_tensors([cond, a, b]) + result = mb.select(cond=cond, a=a, b=b, name=node.name) context.add(result) @@ -5366,17 +5476,6 @@ def neg(context, node): @register_torch_op def topk(context, node): - def dynamic_topk(x, k, axis, ascending): - assert k.val is None, "Please use mb.topk directly if k is compile time known" - indices = mb.argsort(x=x, axis=axis, ascending=ascending) - values = mb.gather_along_axis(x=x, indices=indices, axis=axis) - - k_indices = mb.range_1d(end=k, start=0, step=1) - values = mb.gather(x=values, indices=k_indices, axis=axis) - indices = mb.gather(x=indices, indices=k_indices, axis=axis) - - return values, indices - inputs = _get_inputs(context, node) kwargs = {"name": node.name, "x": inputs[0], "k": inputs[1]} @@ -6447,33 +6546,6 @@ def _cast_bool_attn_mask(attn_mask: Var, query_var: Var) -> Var: compliment_of_mask = mb.mul(x=negative_inf, y=compliment_of_mask) return mb.add(x=mask, y=compliment_of_mask) - -def _lower_scaled_dot_product_attention(q: Var, k: Var, v: Var, mask: Var, name: str) -> Var: - # scale the query input - embed_size = q.shape[-1] - if is_symbolic(embed_size): - raise ValueError( - "The embedding size, i.e. last dimension of the shape of query tensor" - " cannot be symbolic, in scaled_dot_product_attention op" - ) - multiplicative_scale_factor = 1 / _math.sqrt(embed_size) - q = mb.mul(x=q, y=multiplicative_scale_factor) - - # multiply query and key input tensors - # shape of output: (target_seq, source_seq) or (B,...,target_seq, source_seq) - attn_weights = mb.matmul(x=q, y=k, transpose_y=True) - - # add mask if applicable - if mask is not None: - attn_weights = mb.add(x=attn_weights, y=mask) - - # do softmax - attn_weights_normalized = mb.softmax(x=attn_weights, axis=-1) - - # multiply attn_weights and value tensor - res = mb.matmul(x=attn_weights_normalized, y=v, name=name) - return res - @register_torch_op def scaled_dot_product_attention(context, node): """ @@ -6500,13 +6572,13 @@ def scaled_dot_product_attention(context, node): attn_mask = None if len(inputs) < 4 else inputs[3] dropout = 0.0 if len(inputs) < 5 else inputs[4] is_causal = False if len(inputs) < 6 else inputs[5].val - + # When len(inputs) == 7, the inputs are (q, k, v, attn_mask, dropout, is_causal, scale) if len(inputs) == 7 and inputs[6] is not None: raise NotImplementedError( "scaled_dot_product_attention op: scale parameter is not handled." ) - + if attn_mask is not None and is_causal: raise ValueError( "scaled_dot_product_attention op: attn_mask cannot be provided when is_causal is set to True." @@ -6534,7 +6606,7 @@ def scaled_dot_product_attention(context, node): else: mask = attn_mask - res = _lower_scaled_dot_product_attention(q, k, v, mask, node.name) + res = _utils._lower_scaled_dot_product_attention(q, k, v, mask, node.name) context.add(res) diff --git a/coremltools/converters/mil/frontend/torch/quantization_ops.py b/coremltools/converters/mil/frontend/torch/quantization_ops.py index d33b66877..236f428e4 100644 --- a/coremltools/converters/mil/frontend/torch/quantization_ops.py +++ b/coremltools/converters/mil/frontend/torch/quantization_ops.py @@ -3,21 +3,24 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + import numpy as _np import torch as _torch from coremltools import _logger as logger +from coremltools.converters.mil.frontend import _utils from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import Var, types +from .ops import _create_linear_layer, _get_inputs, promote_input_dtypes +from .torch_op_registry import register_torch_op from .utils import ( NUM_TO_TORCH_DTYPE, TORCH_QTYPE_TO_NP_TYPE, TORCH_QTYPE_TO_STR, + TYPE_TO_DTYPE_STRING, TorchFrontend, ) -from .ops import _create_linear_layer, _get_inputs, promote_input_dtypes -from .torch_op_registry import register_torch_op def _quantize_general( @@ -72,10 +75,16 @@ def _quantize_general( axis=axis, ) context.add(result, node.name) - context.quant_context.add_quantization_info(node.name, torch_dtype, scale, zero_point, axis) + if context.frontend == TorchFrontend.TORCHSCRIPT: + context.quant_context.add_quantization_info(node.name, torch_dtype, scale, zero_point, axis) -@register_torch_op(torch_alias=["quantized_decomposed::quantize_per_tensor"]) +@register_torch_op( + torch_alias=[ + "quantized_decomposed::quantize_per_tensor", + "quantized_decomposed.quantize_per_tensor", + ] +) def quantize_per_tensor(context, node): inputs = _get_inputs( context, @@ -101,25 +110,81 @@ def quantize_per_channel(context, node): _quantize_general(context, node, input, scale, zero_point, torch_dtype, axis.val) -@register_torch_op(torch_alias=["quantized_decomposed::dequantize_per_tensor"]) -def dequantize(context, node): - context.quant_context.get_dequantized_var(node.inputs[0], node.name) +def _dequantize_general( + context, + node, + input: Var, + scale: Var, + zero_point: Var, + axis: Var = None, +) -> None: + # torch may use different dtype for input and zero_point, + # but Core ML requires input and zero_point to have a same dtype, + # so cast zero_point dtype to input dtype + if input.dtype != zero_point.dtype: + zero_point = mb.cast(x=zero_point, dtype=TYPE_TO_DTYPE_STRING[input.dtype]) + # Not sure why torch may quantize a scalar... does not make sense, + # since the floating point scale is as big as the original floating point input data scalar + if input.rank == 0: + # For const input, translate to the const floating point scalar output + if input.val is not None: + output_value = scale.val * (input.val - zero_point.val) + output = mb.const(val=output_value) + # For variable input, we have no choice but to expand and squeeze, + # since CoreML dequantize op requires tensor input + else: + expanded_input = mb.expand_dims(x=input, axes=(0,)) + dequantize_output = mb.dequantize( + input=expanded_input, + zero_point=zero_point, + scale=scale, + axis=axis, + ) + output = mb.squeeze(x=dequantize_output) + else: + output = mb.dequantize( + input=input, + zero_point=zero_point, + scale=scale, + axis=axis, + ) + context.add(output, node.name) -def _construct_constexpr_affine_op(quantized_weights, zero_point, scale, axis=None, name=None): - """Constructs the constexpr op to represent the dequantized weight from PyTorch's data.""" - if axis is None: - # It's per-tensor quantization, just use a dummy value for axis. - axis = _np.int32(0) - kwargs = { - "quantized_data": quantized_weights, - "zero_point": zero_point, - "scale": scale, - "axis": axis, - } - if name is not None: - kwargs["name"] = name - return mb.constexpr_affine_dequantize(**kwargs) +@register_torch_op( + torch_alias=[ + "quantized_decomposed::dequantize_per_tensor", + "quantized_decomposed.dequantize_per_tensor", + "quantized_decomposed::dequantize_per_channel", + "quantized_decomposed.dequantize_per_channel", + ] +) +def dequantize(context, node): + if context.frontend == TorchFrontend.TORCHSCRIPT: + context.quant_context.get_dequantized_var(node.inputs[0], node.name) + elif context.frontend == TorchFrontend.EXIR: + # ExecuTorch intends to use `min` and `max` to indicate quantization dtype, e.g. + # min = -64, max = 63, torch_dtype = torch.int8 + # means int4 quantization (torch_dtype = torch.int8 due to there is no torch.int4 yet) + # For now (2024-02-27), 2 issues preventing us from translating `min` and `max` + # 1. ExecuTorch has not fully added 4-bit quantization support yet, so no way to test + # 2. CoreML supports only 8-bit quantization yet, so no way to translate + # TODO(rdar://123421506): Translate `min` and `max` once the above 2 issues get resolved + inputs = _get_inputs(context, node, min_expected={TorchFrontend.EXIR: 6}) + num_inputs = len(inputs) + if num_inputs == 6: + input, scale, zero_point, min, max, torch_dtype_number = inputs + axis = None + elif num_inputs == 7: + input, scale, zero_point, axis, min, max, torch_dtype_number = inputs + else: + raise ValueError(f"dequantize should have 6 or 7 inputs, but got {num_inputs}") + _dequantize_general(context, node, input, scale, zero_point, axis) + else: + raise ValueError( + "dequantize is supported only in TorchScript and EXIR frontends, " + f"but got {context.frontend}" + ) def _dequantized_weight(qweight, name: str = None): @@ -132,7 +197,7 @@ def _dequantized_weight(qweight, name: str = None): scale = _np.float32(qweight.q_scale()) zero_point = quant_dtype_np(qweight.q_zero_point()) quantized_weights = _torch.int_repr(qweight).numpy() - dequant_weights = _construct_constexpr_affine_op( + dequant_weights = _utils._construct_constexpr_affine_op( quantized_weights, zero_point, scale, axis=None, name=name ) # per_channel_affine_float_qparams is same as per_channel_affine except that it @@ -158,7 +223,7 @@ def _dequantized_weight(qweight, name: str = None): zero_point = quant_dtype_np(val) quantized_weights = _torch.int_repr(qweight).numpy() axis = _np.int32(qweight.q_per_channel_axis()) - dequant_weights = _construct_constexpr_affine_op( + dequant_weights = _utils._construct_constexpr_affine_op( quantized_weights, zero_point, scale, axis=axis, name=name ) else: diff --git a/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py b/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py index 49b85ef1f..9033fb5b0 100644 --- a/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py +++ b/coremltools/converters/mil/frontend/torch/ssa_passes/torch_tensor_assign_to_core.py @@ -22,7 +22,7 @@ def apply(self, prog): @block_context_manager def _torch_tensor_assign_to_core_block(block): - for op in block.operations[:]: + for op in list(block.operations): for b in op.blocks: _torch_tensor_assign_to_core_block(b) diff --git a/coremltools/converters/mil/frontend/torch/ssa_passes/torch_upsample_to_core_upsample.py b/coremltools/converters/mil/frontend/torch/ssa_passes/torch_upsample_to_core_upsample.py index d8864f80d..6d76375aa 100644 --- a/coremltools/converters/mil/frontend/torch/ssa_passes/torch_upsample_to_core_upsample.py +++ b/coremltools/converters/mil/frontend/torch/ssa_passes/torch_upsample_to_core_upsample.py @@ -36,7 +36,7 @@ def apply(self, prog): @block_context_manager def _torch_upsample_to_core_upsample_block(block): - for op in block.operations[:]: + for op in list(block.operations): for b in op.blocks: _torch_upsample_to_core_upsample_block(b) diff --git a/coremltools/converters/mil/frontend/torch/test/test_executorch_e2e.py b/coremltools/converters/mil/frontend/torch/test/test_executorch_e2e.py index 69ad8d47b..f128b8fdd 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_executorch_e2e.py +++ b/coremltools/converters/mil/frontend/torch/test/test_executorch_e2e.py @@ -3,6 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +import itertools import pytest from coremltools._deps import _HAS_EXECUTORCH, _HAS_TORCH_VISION @@ -18,28 +19,72 @@ import timm import transformers +from coremltools.converters.mil import testing_reqs +from coremltools.converters.mil.mil.scope import ScopeSource + from .testing_utils import TorchBaseTest, TorchFrontend +backends = testing_reqs.backends +compute_units = testing_reqs.compute_units -class TestExecutorch(TorchBaseTest): - def test_mul(self): - class MulModule(torch.nn.Module): - def __init__(self) -> None: - super().__init__() +class TestExecutorchExampleModels(TorchBaseTest): + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_mul(self, compute_unit, backend): + class MulModule(torch.nn.Module): def forward(self, input, other): return input * other - model = MulModule() - model.eval() - - self.run_compare_torch( + _, coreml_model, _, _, _, _ = self.run_compare_torch( [(3, 2), (3, 2)], - model, + MulModule(), + compute_unit=compute_unit, + backend=backend, frontend=TorchFrontend.EXIR, ) - def test_linear(self): + mil_program = coreml_model._mil_program + mul = mil_program.functions["main"].find_ops(op_type="mul")[0] + + debug_handle = mul.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] + assert isinstance(debug_handle, int) + + debug_handle_to_ops_mapping = mil_program.construct_debug_handle_to_ops_mapping() + assert debug_handle_to_ops_mapping.keys() == {debug_handle} + + ops = debug_handle_to_ops_mapping[debug_handle] + index_mul = 0 + indices_const = () + indices_cast = () + if backend[1] == "fp32": + assert len(ops) == 1 + index_mul = 0 + else: + # fp16 introduces additional io casts + # each cast introduces 1 const to store destination dtype + assert len(ops) == 7 + index_mul = 4 + indices_const = (0, 1, 5) + indices_cast = (2, 3, 6) + assert ops[index_mul] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + {"Type": "Operation", "Operation_Type": "mul", "Output": mul.outputs[0].name}, + ] + for index_const_cast in indices_const + indices_cast: + assert ops[index_const_cast][:-1] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + ] + for index_const in indices_const: + assert ops[index_const][-1]["Operation_Type"] == "const" + for index_cast in indices_cast: + assert ops[index_cast][-1]["Operation_Type"] == "cast" + + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_linear(self, compute_unit, backend): class LinearModule(torch.nn.Module): def __init__(self): super().__init__() @@ -48,18 +93,58 @@ def __init__(self): def forward(self, arg): return self.linear(arg) - model = LinearModule() - model.eval() - - self.run_compare_torch( - [(3, 3)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + _, coreml_model, _, _, _, _ = self.run_compare_torch( + [(3, 3)], + LinearModule(), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_add(self): + mil_program = coreml_model._mil_program + linear = mil_program.functions["main"].find_ops(op_type="linear")[0] + + debug_handle = linear.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] + assert isinstance(debug_handle, int) + + debug_handle_to_ops_mapping = mil_program.construct_debug_handle_to_ops_mapping() + assert debug_handle_to_ops_mapping.keys() == {debug_handle} + + ops = debug_handle_to_ops_mapping[debug_handle] + index_linear = 0 + indices_const = () + indices_cast = () + if backend[1] == "fp32": + assert len(ops) == 3 + index_linear = 2 + indices_const = (0, 1) + else: + # fp16 introduces additional io casts + # each cast introduces 1 const to store destination dtype + assert len(ops) == 7 + index_linear = 4 + indices_const = (0, 1, 2, 5) + indices_cast = (3, 6) + assert ops[index_linear] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + {"Type": "Operation", "Operation_Type": "linear", "Output": linear.outputs[0].name}, + ] + for index_const_cast in indices_const + indices_cast: + assert ops[index_const_cast][:-1] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + ] + for index_const in indices_const: + assert ops[index_const][-1]["Operation_Type"] == "const" + for index_cast in indices_cast: + assert ops[index_cast][-1]["Operation_Type"] == "cast" + + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_add(self, compute_unit, backend): class AddModule(torch.nn.Module): - def __init__(self): - super().__init__() - def forward(self, x, y): z = x + y z = z + x @@ -67,35 +152,135 @@ def forward(self, x, y): z = z + z return z - model = AddModule() - model.eval() - - self.run_compare_torch( - [(1,), (1,)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + _, coreml_model, _, _, _, _ = self.run_compare_torch( + [(1,), (1,)], + AddModule(), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_add_mul(self): + mil_program = coreml_model._mil_program + adds = mil_program.functions["main"].find_ops(op_type="add") + + debug_handles = [add.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] for add in adds] + for debug_handle in debug_handles: + assert isinstance(debug_handle, int) + + debug_handle_to_ops_mapping = mil_program.construct_debug_handle_to_ops_mapping() + assert debug_handle_to_ops_mapping.keys() == set(debug_handles) + + for add_index, debug_handle in enumerate(debug_handles): + add = adds[add_index] + ops = debug_handle_to_ops_mapping[debug_handle] + index_add = 0 + indices_const = () + indices_cast = () + if backend[1] == "fp32": + assert len(ops) == 1 + index_add = 0 + else: + # fp16 introduces additional io casts + # each cast introduces 1 const to store destination dtype + ADD_INDEX_TO_NUM_OPS = {0: 5, 1: 1, 2: 1, 3: 3} + ADD_INDEX_TO_OP_INDEX = {0: -1, 1: 0, 2: 0, 3: 0} + assert len(ops) == ADD_INDEX_TO_NUM_OPS[add_index] + index_add = ADD_INDEX_TO_OP_INDEX[add_index] + if add_index == 0: + indices_const = (0, 1) + indices_cast = (2, 3) + elif add_index == 3: + indices_const = (1,) + indices_cast = (2,) + assert ops[index_add] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + {"Type": "Operation", "Operation_Type": "add", "Output": add.outputs[0].name}, + ] + for index_const_cast in indices_const + indices_cast: + assert ops[index_const_cast][:-1] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + ] + for index_const in indices_const: + assert ops[index_const][-1]["Operation_Type"] == "const" + for index_cast in indices_cast: + assert ops[index_cast][-1]["Operation_Type"] == "cast" + + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_add_mul(self, compute_unit, backend): class AddMulModule(torch.nn.Module): - def __init__(self): - super().__init__() - def forward(self, a, x, b): y = torch.mm(a, x) z = torch.add(y, b) return z - model = AddMulModule() - model.eval() - - self.run_compare_torch( + _, coreml_model, _, _, _, _ = self.run_compare_torch( [(2, 2), (2, 2), (2, 2)], - model, + AddMulModule(), + compute_unit=compute_unit, + backend=backend, frontend=TorchFrontend.EXIR, - backend=("mlprogram", "fp16"), ) - def test_softmax(self): - class LinearModule(torch.nn.Module): + mil_program = coreml_model._mil_program + matmul_or_add = {} + for op_type in ("matmul", "add"): + matmul_or_add[op_type] = mil_program.functions["main"].find_ops(op_type=op_type)[0] + + debug_handle = { + k: v.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] for k, v in matmul_or_add.items() + } + for v in debug_handle.values(): + assert isinstance(v, int) + + debug_handle_to_ops_mapping = mil_program.construct_debug_handle_to_ops_mapping() + assert debug_handle_to_ops_mapping.keys() == set(debug_handle.values()) + + ops = {} + for op_type in ("matmul", "add"): + ops[op_type] = debug_handle_to_ops_mapping[debug_handle[op_type]] + index = {"matmul": 0, "add": 0} + indices_const = {"matmul": (), "add": ()} + indices_cast = {"matmul": (), "add": ()} + if backend[1] == "fp32": + assert len(ops["matmul"]) == 3 and len(ops["add"]) == 1 + index = {"matmul": 2, "add": 0} + indices_const["matmul"] = (0, 1) + else: + # fp16 introduces additional io casts + # each cast introduces 1 const to store destination dtype + assert len(ops["matmul"]) == 7 and len(ops["add"]) == 5 + index = {"matmul": 6, "add": 2} + indices_const = {"matmul": (0, 1, 2, 3), "add": (0, 3)} + indices_cast = {"matmul": (4, 5), "add": (1, 4)} + for op_type in ("matmul", "add"): + assert ops[op_type][index[op_type]] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + { + "Type": "Operation", + "Operation_Type": op_type, + "Output": matmul_or_add[op_type].outputs[0].name, + }, + ] + for index_const_cast in indices_const[op_type] + indices_cast[op_type]: + assert ops[op_type][index_const_cast][:-1] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + ] + for index_const in indices_const[op_type]: + assert ops[op_type][index_const][-1]["Operation_Type"] == "const" + for index_cast in indices_cast[op_type]: + assert ops[op_type][index_cast][-1]["Operation_Type"] == "cast" + + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_softmax(self, compute_unit, backend): + class SoftmaxModule(torch.nn.Module): def __init__(self): super().__init__() self.softmax = torch.nn.Softmax() @@ -103,33 +288,80 @@ def __init__(self): def forward(self, x): return self.softmax(x) - model = LinearModule() - model.eval() - - self.run_compare_torch( - [(2, 2)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + _, coreml_model, _, _, _, _ = self.run_compare_torch( + [(2, 2)], + SoftmaxModule(), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - @pytest.mark.xfail(reason="numerical error") - def test_deeplab_v3(self): - model = torchvision.models.segmentation.deeplabv3_resnet50( - weights=torchvision.models.segmentation.deeplabv3.DeepLabV3_ResNet50_Weights.DEFAULT - ) - model.eval() + mil_program = coreml_model._mil_program + softmax = mil_program.functions["main"].find_ops(op_type="softmax")[0] + + debug_handle = softmax.scopes[ScopeSource.EXIR_DEBUG_HANDLE][0] + assert isinstance(debug_handle, int) + + debug_handle_to_ops_mapping = mil_program.construct_debug_handle_to_ops_mapping() + assert debug_handle_to_ops_mapping.keys() == {debug_handle} + + ops = debug_handle_to_ops_mapping[debug_handle] + index_softmax = 0 + indices_const = () + indices_cast = () + if backend[1] == "fp32": + assert len(ops) == 2 + index_softmax = 1 + indices_const = (0,) + else: + # fp16 introduces additional io casts + # each cast introduces 1 const to store destination dtype + assert len(ops) == 6 + index_softmax = 3 + indices_const = (0, 1, 4) + indices_cast = (2, 5) + assert ops[index_softmax] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + {"Type": "Operation", "Operation_Type": "softmax", "Output": softmax.outputs[0].name}, + ] + for index_const_cast in indices_const + indices_cast: + assert ops[index_const_cast][:-1] == [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + ] + for index_const in indices_const: + assert ops[index_const][-1]["Operation_Type"] == "const" + for index_cast in indices_cast: + assert ops[index_cast][-1]["Operation_Type"] == "cast" + @pytest.mark.xfail(reason="numerical error") + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_deeplab_v3(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 224, 224)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 224, 224)], + torchvision.models.segmentation.deeplabv3_resnet50( + weights=torchvision.models.segmentation.deeplabv3.DeepLabV3_ResNet50_Weights.DEFAULT + ), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_edsr(self): - model = torchsr.models.edsr_r16f64(2, True) - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_edsr(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 224, 224)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 224, 224)], + torchsr.models.edsr_r16f64(2, True), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_emformer_transcribe(self): + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_emformer_transcribe(self, compute_unit, backend): class EmformerRnntTranscriberExample(torch.nn.Module): """ This is a wrapper for validating transcriber for the Emformer RNN-T architecture. @@ -145,14 +377,19 @@ def __init__(self) -> None: def forward(self, sources, source_lengths): return self.rnnt.transcribe(sources, source_lengths) - model = EmformerRnntTranscriberExample() - model.eval() + if backend[0] == "neuralnetwork": + pytest.xfail("rdar://125514139 emformer transcribe fails on neuralnetwork") self.run_compare_torch( - [(1, 128, 80), (128,)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 128, 80), (128,)], + EmformerRnntTranscriberExample(), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_emformer_predict(self): + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_emformer_predict(self, compute_unit, backend): class EmformerRnntPredictorExample(torch.nn.Module): """ This is a wrapper for validating predictor for the Emformer RNN-T architecture. @@ -168,19 +405,18 @@ def __init__(self) -> None: def forward(self, targets, target_lengths): return self.rnnt.predict(targets, target_lengths, None) - model = EmformerRnntPredictorExample() - model.eval() - self.run_compare_torch( [torch.zeros([1, 128], dtype=int), torch.tensor([128], dtype=int)], - model, + EmformerRnntPredictorExample(), input_as_shape=False, + compute_unit=compute_unit, + backend=backend, frontend=TorchFrontend.EXIR, - backend=("mlprogram", "fp16"), ) @pytest.mark.xfail(reason="numerical error") - def test_emformer_join(self): + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_emformer_join(self, compute_unit, backend): class EmformerRnntJoinerExample(torch.nn.Module): """ This is a wrapper for validating joiner for the Emformer RNN-T architecture. @@ -196,96 +432,112 @@ def __init__(self) -> None: def forward(self, source_encodings, source_lengths, target_encodings, target_lengths): return self.rnnt.join(source_encodings, source_lengths, target_encodings, target_lengths) - model = EmformerRnntJoinerExample() - model.eval() - self.run_compare_torch( [(1, 128, 1024), (128,), (1, 128, 1024), (128,)], - model, + EmformerRnntJoinerExample(), + compute_unit=compute_unit, + backend=backend, frontend=TorchFrontend.EXIR, - backend=("mlprogram", "fp16"), ) - # TODO: add llama2 - - def test_mobilebert(self): - model = transformers.MobileBertModel.from_pretrained( - "google/mobilebert-uncased", return_dict=False - ) - model.eval() + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_mobilebert(self, compute_unit, backend): + if backend[1] == "fp16": + pytest.skip("Mobile Bert overflows fp16") tokenizer = transformers.AutoTokenizer.from_pretrained("google/mobilebert-uncased") token = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"] self.run_compare_torch( token, - model, + transformers.MobileBertModel.from_pretrained( + "google/mobilebert-uncased", return_dict=False + ), input_as_shape=False, + compute_unit=compute_unit, + backend=backend, frontend=TorchFrontend.EXIR, - backend=("mlprogram", "fp32"), rtol=0.005, ) - def test_mobilenet_v2(self): - model = torchvision.models.mobilenet_v2(weights=torchvision.models.mobilenetv2.MobileNet_V2_Weights.DEFAULT) - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_mobilenet_v2(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 224, 224)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 224, 224)], + torchvision.models.mobilenet_v2( + weights=torchvision.models.mobilenetv2.MobileNet_V2_Weights.DEFAULT + ), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_mobilenet_v3(self): - model = torchvision.models.mobilenet_v3_small(pretrained=True) - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_mobilenet_v3(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 224, 224)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 224, 224)], + torchvision.models.mobilenet_v3_small(pretrained=True), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_vit(self): - model = torchvision.models.vit_b_16(weights="IMAGENET1K_V1") - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_vit(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 224, 224)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 224, 224)], + torchvision.models.vit_b_16(weights="IMAGENET1K_V1"), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_wav2letter(self): - model = torchaudio.models.Wav2Letter(num_classes=4096) - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_wav2letter(self, compute_unit, backend): self.run_compare_torch( - [(10, 1, 700)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(10, 1, 700)], + torchaudio.models.Wav2Letter(num_classes=4096), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_inception_v3(self): - model = torchvision.models.inception_v3(weights="IMAGENET1K_V1") - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_inception_v3(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 224, 224)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 224, 224)], + torchvision.models.inception_v3(weights="IMAGENET1K_V1"), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_inception_v4(self): - model = timm.models.inception_v4(pretrained=True) - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_inception_v4(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 299, 299)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 299, 299)], + timm.models.inception_v4(pretrained=True), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_resnet18(self): - model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1) - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_resnet18(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 224, 224)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 224, 224)], + torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) - def test_resnet50(self): - model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V1) - model.eval() - + @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) + def test_resnet50(self, compute_unit, backend): self.run_compare_torch( - [(1, 3, 224, 224)], model, frontend=TorchFrontend.EXIR, backend=("mlprogram", "fp16") + [(1, 3, 224, 224)], + torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V1), + compute_unit=compute_unit, + backend=backend, + frontend=TorchFrontend.EXIR, ) diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py b/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py index d50bfe1d5..77a45f0d0 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_conversion_api.py @@ -20,10 +20,11 @@ ) from coremltools.converters.mil.frontend.torch.test.testing_utils import _copy_input_data from coremltools.converters.mil.frontend.torch.torch_op_registry import ( - TorchOpsRegistry, _TORCH_OPS_REGISTRY, + TorchOpsRegistry, register_torch_op, ) +from coremltools.converters.mil.mil.types.symbolic import any_symbolic from coremltools.converters.mil.testing_reqs import backends from coremltools.converters.mil.testing_utils import ( assert_cast_ops_count, @@ -131,19 +132,48 @@ def test_source_dialect_metadata(torch_model, backend): assert mlmodel.user_defined_metadata[_METADATA_SOURCE_DIALECT] == "TorchScript" - @pytest.mark.skipif(not _HAS_EXECUTORCH, reason=MSG_EXECUTORCH_NOT_FOUND) class TestEXIRValidation: @staticmethod - @pytest.mark.parametrize( - "backend", - backends, - ) + @pytest.mark.parametrize("backend", backends) + def test_fp16_io(torch_model, backend): # TODO (rdar://115845792): Handle fp16 IO dtypes + class TestModule(torch.nn.Module): + def __init__(self): + super(TestModule, self).__init__() + self.linear = torch.nn.Linear(10, 20, dtype=torch.float16) + + def forward(self, x): + return self.linear(x) + + model = TestModule() + model.eval() + + shape = (1, 10) + example_inputs = (torch.rand(*shape, dtype=torch.float16),) + exir_program_aten = torch.export.export(model, example_inputs) + exir_program_edge = executorch.exir.to_edge(exir_program_aten).exported_program() + + # Default deployment target is iOS14 for neuralnetwork and iOS15 for mlprogram, + # both are too old to support fp16 io + with pytest.raises( + ValueError, match=r"To use fp16 input, please set minimum deployment target to iOS16\+" + ): + ct.convert(exir_program_edge, convert_to=backend[0]) + + # fp16 io should work fine for iOS16+ + if backend[0] == "mlprogram": + ct.convert( + exir_program_edge, + convert_to="mlprogram", + minimum_deployment_target=ct.target.iOS16, + ) + + @staticmethod + @pytest.mark.parametrize("backend", backends) def test_inputs( torch_model, backend ): # TODO: rdar://115845792 ([Executorch] Handle user provided inputs/outputs in the convert API) - - shape = (1, 10) + shape = (2, 10) exir_program_aten = torch.export.export(torch_model, (torch.rand(*shape),)) exir_program_edge = executorch.exir.to_edge(exir_program_aten).exported_program() @@ -157,15 +187,11 @@ def test_inputs( ) @staticmethod - @pytest.mark.parametrize( - "backend", - backends, - ) + @pytest.mark.parametrize("backend", backends) def test_outputs( torch_model, backend ): # TODO: rdar://115845792 ([Executorch] Handle user provided inputs/outputs in the convert API) - - shape = (1, 10) + shape = (3, 10) exir_program_aten = torch.export.export(torch_model, (torch.rand(*shape),)) exir_program_edge = executorch.exir.to_edge(exir_program_aten).exported_program() @@ -179,12 +205,9 @@ def test_outputs( ) @staticmethod - @pytest.mark.parametrize( - "backend", - backends, - ) + @pytest.mark.parametrize("backend", backends) def test_source_dialect_metadata(torch_model, backend): - shape = (1, 10) + shape = (4, 10) exir_program_aten = torch.export.export(torch_model, (torch.rand(*shape),)) exir_program_edge = executorch.exir.to_edge(exir_program_aten).exported_program() @@ -198,6 +221,7 @@ def test_source_dialect_metadata(torch_model, backend): assert mlmodel.user_defined_metadata[_METADATA_SOURCE_DIALECT] == "TorchExport::EDGE" + @pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND) class TestTorchOpsRegistry: @staticmethod @@ -1239,7 +1263,7 @@ def forward(self, x): def rank4_input_model(): class Model(torch.nn.Module): def forward(self, x): - return x + 5.5 + return x + 5.0 example_input = torch.randint(0, 100, (1, 3, 10, 20), dtype=torch.float32) return torch.jit.trace(Model().eval(), example_input) @@ -1674,6 +1698,35 @@ def test_color_output(self, rank4_input_model, float32_input_model_add_op): assert_spec_output_image_type(mlmodel._spec, expected_feature_type=ft.ImageFeatureType.BGR) verify_prediction(mlmodel) + # check mlprogram can have dynamic shape image output + shape = ct.Shape((1, 3, ct.RangeDim(5, 10), ct.RangeDim(5, 10))) + mlmodel = ct.convert( + rank4_input_model, + inputs=[ct.TensorType(shape=shape, dtype=np.float32)], + outputs=[ct.ImageType(name="output_image", color_layout=ct.colorlayout.RGB)], + minimum_deployment_target=ct.target.macOS13, + ) + assert_ops_in_mil_program(mlmodel, expected_op_list=["cast", "add", "cast"]) + assert_spec_output_image_type(mlmodel._spec, expected_feature_type=ft.ImageFeatureType.RGB) + assert_prog_input_type(mlmodel._mil_program, expected_dtype_str="fp32") + assert_prog_output_type(mlmodel._mil_program, expected_dtype_str="fp32") + assert any_symbolic(mlmodel._mil_program.functions["main"].outputs[0].shape) + verify_prediction(mlmodel) + + # Test output image numerical + sample_input = np.random.randint(low=0, high=200, size=(1, 3, 10, 10)).astype(np.float32) + model_output_pil_image = mlmodel.predict({"x": sample_input})["output_image"] + assert isinstance(model_output_pil_image, Image.Image) + assert model_output_pil_image.mode == "RGBA" + model_output_as_numpy = np.array(model_output_pil_image)[:, :, :3] # last A channel is 255 + model_output_as_numpy = np.transpose(model_output_as_numpy, axes=[2, 0, 1]) + reference_output = rank4_input_model(torch.from_numpy(sample_input)).detach().numpy() + reference_output = np.squeeze(reference_output) + np.testing.assert_allclose(reference_output, model_output_as_numpy, rtol=1e-2, atol=1e-2) + + a_channel = np.array(model_output_pil_image)[:, :, 3].flatten() + assert np.all(a_channel == 255) + def test_grayscale_output(self, rank4_grayscale_input_model): with pytest.raises(TypeError, match="float16 dtype for outputs is only supported for deployment target >= iOS16/macOS13"): ct.convert(rank4_grayscale_input_model, @@ -1811,15 +1864,24 @@ def test_grayscale_fp16_input_image(self, rank4_grayscale_input_model): reference_output = rank4_grayscale_input_model(torch.from_numpy(sample_input.astype(np.float32))).detach().numpy() np.testing.assert_allclose(reference_output, model_output, rtol=1e-2, atol=1e-2) - def test_grayscale_output_image(self, rank4_grayscale_input_model): - mlmodel = ct.convert(rank4_grayscale_input_model, - inputs=[ct.TensorType(name="input", - shape=(1, 1, 10, 20))], - outputs=[ct.ImageType(name="output_image", - color_layout=ct.colorlayout.GRAYSCALE)], - minimum_deployment_target=ct.target.macOS13, - compute_precision=ct.precision.FLOAT32, - ) + @pytest.mark.parametrize( + "dynamic_shape", + [True, False], + ) + def test_grayscale_output_image(self, rank4_grayscale_input_model, dynamic_shape): + + if dynamic_shape: + shape = ct.Shape((1, 1, ct.RangeDim(5, 10), ct.RangeDim(5, 20))) + else: + shape = (1, 1, 10, 20) + + mlmodel = ct.convert( + rank4_grayscale_input_model, + inputs=[ct.TensorType(name="input", shape=shape)], + outputs=[ct.ImageType(name="output_image", color_layout=ct.colorlayout.GRAYSCALE)], + minimum_deployment_target=ct.target.macOS13, + compute_precision=ct.precision.FLOAT32, + ) sample_input = np.random.randint(low=0, high=200, size=(1, 1, 10, 20)).astype(np.float32) model_output_pil_image = mlmodel.predict({"input": sample_input})['output_image'] assert isinstance(model_output_pil_image, Image.Image) @@ -1829,15 +1891,27 @@ def test_grayscale_output_image(self, rank4_grayscale_input_model): reference_output = np.squeeze(reference_output) np.testing.assert_allclose(reference_output, model_output_as_numpy, rtol=1e-2, atol=1e-2) - def test_grayscale_fp16_output_image(self, rank4_grayscale_input_model): - mlmodel = ct.convert(rank4_grayscale_input_model, - inputs=[ct.TensorType(name="input", - shape=(1, 1, 10, 20))], - outputs=[ct.ImageType(name="output_image", - color_layout=ct.colorlayout.GRAYSCALE_FLOAT16)], - minimum_deployment_target=ct.target.macOS13, - compute_precision=ct.precision.FLOAT32, - ) + @pytest.mark.parametrize( + "dynamic_shape", + [True, False], + ) + def test_grayscale_fp16_output_image(self, rank4_grayscale_input_model, dynamic_shape): + + if dynamic_shape: + shape = ct.Shape((1, 1, ct.RangeDim(5, 10), ct.RangeDim(5, 20))) + else: + shape = (1, 1, 10, 20) + + mlmodel = ct.convert( + rank4_grayscale_input_model, + inputs=[ct.TensorType(name="input", shape=shape)], + outputs=[ + ct.ImageType(name="output_image", color_layout=ct.colorlayout.GRAYSCALE_FLOAT16) + ], + minimum_deployment_target=ct.target.macOS13, + compute_precision=ct.precision.FLOAT32, + ) + sample_input = np.random.randint(low=0, high=200, size=(1, 1, 10, 20)).astype(np.float32) model_output_pil_image = mlmodel.predict({"input": sample_input})['output_image'] assert isinstance(model_output_pil_image, Image.Image) diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py index 6bacb5ad7..e49783d00 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py @@ -1570,6 +1570,20 @@ def test_convolution2d( bias, groups=1, ): + if ( + backend == ('neuralnetwork', 'fp32') and + padding == 1 and + stride == 2 and + height == 7 and + width == 5 and + in_channels == 3 and + out_channels == 3 and + kernel_size == 2 and + dilation == 3 and + not bias + ): + pytest.xfail("rdar://121954894: Conv2d starts to fail") + if padding == "same" and stride != 1: return model = nn.Conv2d( @@ -3034,9 +3048,48 @@ def forward(self, x, y): else: raise ValueError("Unsupported mode: {mode}".format(mode=mode)) - model = TestModel() self.run_compare_torch( - input_shapes, model, backend=backend, compute_unit=compute_unit + input_shapes, TestModel(), backend=backend, compute_unit=compute_unit + ) + + @pytest.mark.parametrize( + "compute_unit, backend, input_shapes, mode, xdtype, ydtype", + itertools.product( + compute_units, + backends, + [ + [(2, 5, 7, 3), (2, 5, 7, 3)], + [(3, 2, 9), (3, 2, 9)], + [(1, 2, 3), (1,)], + [(1,), (2, 5, 6, 7)], + [(1, 2, 1), (3, 4, 2, 5)], + ], + ["minimum", "maximum"], + (torch.float16, torch.float32), + (torch.float16, torch.float32), + ), + ) + def test_minimum_maximum_mixed_precision( + self, compute_unit, backend, input_shapes, mode, xdtype, ydtype + ): + class TestModel(torch.nn.Module): + def forward(self, x, y): + a = x.to(xdtype) + b = y.to(ydtype) + if mode == "minimum": + return torch.minimum(a, b) + elif mode == "maximum": + return torch.maximum(a, b) + else: + raise ValueError("Unsupported mode: {mode}".format(mode=mode)) + + self.run_compare_torch( + input_shapes, + TestModel(), + compute_unit=compute_unit, + backend=backend, + rtol=1e-6 if xdtype == ydtype and xdtype == torch.float32 else 1e-3, + atol=1e-6 if xdtype == ydtype and xdtype == torch.float32 else 1e-3, ) class TestAMaxAMin(TorchBaseTest): @@ -4824,6 +4877,9 @@ class TestEinsum(TorchBaseTest): ), ) def test_binary_einsum(self, compute_unit, backend, equation, reverse_input_order, dynamic): + if dynamic and backend[0] == "mlprogram" and ct.utils._macos_version() > (14, 2): + pytest.xfail("rdar://120386990 (Einsum Model Failed)") + class TestBinaryEinsum(nn.Module): def forward(self, x, y): return torch.einsum(equation, x, y) @@ -4976,11 +5032,41 @@ def test_squeeze(self, compute_unit, backend, rank_and_axis): else: input_shape[0] = 1 input_shape = tuple(input_shape) - model = ModuleWrapper( - function=torch.squeeze, kwargs={"dim": axis} if axis else {} - ) + model = ModuleWrapper(function=torch.squeeze, kwargs={"dim": axis} if axis else {}) + self.run_compare_torch(input_shape, model, backend=backend, compute_unit=compute_unit) + + @pytest.mark.parametrize( + "compute_unit, backend, dynamic, dim", + itertools.product(compute_units, backends, [True, False], [None, 0, 2, (1,), (1, 2)]), + ) + def test_squeeze_non_single_element_dim(self, compute_unit, backend, dynamic, dim): + if backend[0] == "neuralnetwork": + pytest.skip("neuralnetwork backend doesn't support squeeze a not-1 dimension") + if dynamic and compute_unit == ct.ComputeUnit.CPU_ONLY: + pytest.skip("CPU behaves differently from PyTorch for dropping dynamic dim.") + if compute_unit == ct.ComputeUnit.CPU_ONLY and dim in {0, (1,), (1, 2)}: + pytest.xfail("CPU failed non-single-dim squeeze (rdar://124555262)") + + input_shape = (2, 3, 1) + model = ModuleWrapper(function=torch.squeeze, kwargs=None if dim is None else {"dim": dim}) + if dynamic: + converter_input_type = [ + ct.TensorType( + shape=( + ct.RangeDim(upper_bound=10, default=2), + ct.RangeDim(upper_bound=10, default=3), + ct.RangeDim(upper_bound=10, default=1), + ) + ), + ] + else: + converter_input_type = None self.run_compare_torch( - input_shape, model, backend=backend, compute_unit=compute_unit + input_shape, + model, + backend=backend, + compute_unit=compute_unit, + converter_input_type=converter_input_type, ) @@ -5144,8 +5230,43 @@ def test_gather_along_axis(self, compute_unit, backend, rank_and_axis): function=torch.gather, kwargs={"dim": axis, "index": torch.from_numpy(indices)}, ) + self.run_compare_torch([params_shape], model, backend=backend, compute_unit=compute_unit) + + @pytest.mark.parametrize( + "compute_unit, backend, input_enumerated_shape", + itertools.product(compute_units, backends, (True, False)), + ) + def test_gather_enumerated_shape(self, compute_unit, backend, input_enumerated_shape): + axis = 0 + params_shape = (2, 3, 4) + indices_shape = (3, 3, 4) + + class Model(nn.Module): + def forward(self, x, index): + return torch.gather(x, axis, index) + + input_data = [torch.rand(params_shape), torch.randint(0, params_shape[axis], indices_shape)] + # Each model is only allowed for one input feature with enumerated shape. + if input_enumerated_shape: + converter_input_type = [ + ct.TensorType(shape=ct.EnumeratedShapes(shapes=[(2, 3, 4), (3, 4, 5)])), + ct.TensorType(shape=(3, 3, 4), dtype=np.int32), + ] + else: + converter_input_type = [ + ct.TensorType(shape=(2, 3, 4)), + ct.TensorType( + shape=ct.EnumeratedShapes(shapes=[(3, 3, 4), (4, 3, 4)]), dtype=np.int32 + ), + ] self.run_compare_torch( - [params_shape], model, backend=backend, compute_unit=compute_unit + input_data, + Model(), + input_as_shape=False, + converter_input_type=converter_input_type, + backend=backend, + compute_unit=compute_unit, + minimum_deployment_target=ct.target.iOS17, ) def test_gather_along_axis_invalid_indices(self): @@ -5157,6 +5278,42 @@ def test_gather_along_axis_invalid_indices(self): with pytest.raises(RuntimeError, match="index 2 is out of bounds"): torch.gather(data, 1, torch.tensor([[0, 0], [2, 0]])) + @pytest.mark.parametrize( + "compute_unit, backend, dynamic", + itertools.product(compute_units, backends, [True, False]), + ) + def test_gather_nd_int16_indices(self, compute_unit, backend, dynamic): + """Test the indices access in torch model which gets lowered to gather_nd.""" + B, C, H, W, T = 1, 24, 64, 64, 32 + data = torch.rand(B, C, H, W) + time = (torch.rand(1, T) * (C - 1)).to(torch.int) + + class DynamicModel(torch.nn.Module): + def forward(self, data, time): + return data[torch.arange(B).unsqueeze(1), time, :, :] + + class StaticModel(torch.nn.Module): + def forward(self, data): + return data[torch.arange(B).unsqueeze(1), time, :, :] + + torch_model = DynamicModel() if dynamic else StaticModel() + input_data = (data, time) if dynamic else data + converter_input_type = [ct.TensorType(shape=data.shape)] + if dynamic: + converter_input_type.append(ct.TensorType(shape=time.shape, dtype=np.int32)) + + mlmodel = self.run_compare_torch( + input_data, + torch_model, + input_as_shape=False, + converter_input_type=converter_input_type, + backend=backend, + compute_unit=compute_unit, + minimum_deployment_target=ct.target.iOS17, + )[1] + gather_op = mlmodel._mil_program.find_ops(op_type="gather_nd")[0] + assert gather_op.indices.dtype == types.int16 if dynamic else types.uint16 + class TestActivation(TorchBaseTest): @staticmethod @@ -6886,6 +7043,39 @@ def forward(self, x): ) +class TestUnique(TorchBaseTest): + @pytest.mark.parametrize( + "compute_unit, backend, x, return_inverse, return_counts", + itertools.product( + compute_units, + backends, + ( + [1, 2, 3, 2, 2, 3, 99, -1, 1], + [[1, 2, 3, 100], [3, 2, 99, 1]], + ), + (True, False), + (True, False), + ) + ) + def test(self, compute_unit, backend, x, return_inverse, return_counts): + class Model(nn.Module): + def forward(self, x): + return torch.unique( + x, return_inverse=return_inverse, return_counts=return_counts + ) + + if backend[0] == 'neuralnetwork': + pytest.xfail("This op is only supported on mlprogram backend.") + + self.run_compare_torch( + torch.Tensor(x), + Model(), + input_as_shape=False, + backend=backend, + compute_unit=compute_unit, + ) + + class TestFlip(TorchBaseTest): @pytest.mark.parametrize( "compute_unit, backend, rank_dim", @@ -7105,6 +7295,40 @@ def forward(self, cond, x, y): input_as_shape=False, ) + @pytest.mark.parametrize( + "compute_unit, backend, shapes, xdtype, ydtype", + itertools.product( + compute_units, + backends, + [ + [(1, 2), (1, 2), (1, 1)], + [(1, 2, 3), (1, 2, 1), (1, 1, 3)], + ], + (torch.float16, torch.float32), + (torch.float16, torch.float32), + ), + ) + def test_where_mixed_precision(self, compute_unit, backend, shapes, xdtype, ydtype): + class WhereModel(nn.Module): + def forward(self, cond, x, y): + a = x.to(xdtype) + b = y.to(ydtype) + return torch.where(cond, a, b) + + cond_shape, x_shape, y_shape = shapes + cond = torch.rand(*cond_shape) > 0.5 + inputs = [cond, torch.rand(*x_shape), torch.rand(*y_shape)] + + self.run_compare_torch( + inputs, + WhereModel(), + compute_unit=compute_unit, + backend=backend, + input_as_shape=False, + rtol=1e-6 if xdtype == ydtype and xdtype == torch.float32 else 1e-3, + atol=1e-6 if xdtype == ydtype and xdtype == torch.float32 else 1e-3, + ) + @pytest.mark.parametrize( "compute_unit, backend, shape", itertools.product(compute_units, backends, COMMON_SHAPES + [(10,)]), @@ -10213,14 +10437,13 @@ def test_different_input_ranks_no_mask( }, ) - res = self.run_compare_torch( + return self.run_compare_torch( [input_shape] * 3, model, backend=backend, compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, - ) - return res[1] + )[1] @pytest.mark.parametrize( "compute_unit, backend, seq_lengths, include_heads", diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py b/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py index bd51b2af1..d41de1e77 100644 --- a/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py +++ b/coremltools/converters/mil/frontend/torch/test/test_torch_quantization_ops.py @@ -90,6 +90,8 @@ def run_compare_torch( rtol=1e-05, input_as_shape=True, minimum_deployment_target=ct.target.iOS17, + compute_unit=ct.ComputeUnit.CPU_ONLY, + converter=ct.convert, ): # TODO(rdar://108472419): properly design a random input if input_as_shape: @@ -103,8 +105,9 @@ def run_compare_torch( input_as_shape=False, backend=("mlprogram", "fp32"), use_scripting=False, - compute_unit=ct.ComputeUnit.CPU_ONLY, + compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, + converter=converter, ) @@ -399,7 +402,7 @@ def forward(self, x): input_shape = [(3, 5)] res = self.run_compare_torch(input_shape, model) prog = res[1]._mil_program - assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize", "matmul"] + assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize", "linear"] @pytest.mark.skipif(not _HAS_TORCH_VISION, reason=MSG_TORCH_VISION_NOT_FOUND) diff --git a/coremltools/converters/mil/frontend/torch/test/testing_utils.py b/coremltools/converters/mil/frontend/torch/test/testing_utils.py index 918be4003..65f03cc36 100644 --- a/coremltools/converters/mil/frontend/torch/test/testing_utils.py +++ b/coremltools/converters/mil/frontend/torch/test/testing_utils.py @@ -80,9 +80,15 @@ def convert_to_coreml_inputs(input_description, inputs): return coreml_inputs -def convert_to_mlmodel(model_spec, tensor_inputs, backend=("neuralnetwork", "fp32"), - converter_input_type=None, compute_unit=ct.ComputeUnit.CPU_ONLY, - minimum_deployment_target=None): +def convert_to_mlmodel( + model_spec, + tensor_inputs, + backend=("neuralnetwork", "fp32"), + converter_input_type=None, + compute_unit=ct.ComputeUnit.CPU_ONLY, + minimum_deployment_target=None, + converter=ct.convert, +): def _convert_to_inputtype(inputs): if isinstance(inputs, list): return [_convert_to_inputtype(x) for x in inputs] @@ -106,9 +112,15 @@ def _convert_to_inputtype(inputs): inputs = None outputs = None - return ct_convert(model_spec, inputs=inputs, convert_to=backend, - source="pytorch", compute_units=compute_unit, - minimum_deployment_target=minimum_deployment_target) + return ct_convert( + model_spec, + inputs=inputs, + convert_to=backend, + source="pytorch", + compute_units=compute_unit, + minimum_deployment_target=minimum_deployment_target, + converter=converter, + ) def generate_input_data(input_size, rand_range=(0, 1), torch_device=torch.device("cpu")): @@ -162,6 +174,7 @@ def convert_and_compare( converter_input_type=None, compute_unit=ct.ComputeUnit.CPU_ONLY, minimum_deployment_target=None, + converter=ct.convert, ): """ If expected results is not set, it will by default @@ -175,6 +188,9 @@ def convert_and_compare( torch_model = torch.jit.load(model_spec) else: torch_model = model_spec + if _HAS_TORCH_EXPORT_API: + if isinstance(torch_model, ExportedProgram): + torch_model = torch_model.module() if not isinstance(input_data, (list, tuple)): input_data = [input_data] @@ -183,10 +199,15 @@ def convert_and_compare( torch_input = _copy_input_data(input_data) expected_results = torch_model(*torch_input) expected_results = flatten_and_detach_torch_results(expected_results) - mlmodel = convert_to_mlmodel(model_spec, input_data, backend=backend, - converter_input_type=converter_input_type, - compute_unit=compute_unit, - minimum_deployment_target=minimum_deployment_target,) + mlmodel = convert_to_mlmodel( + model_spec, + input_data, + backend=backend, + converter_input_type=converter_input_type, + compute_unit=compute_unit, + minimum_deployment_target=minimum_deployment_target, + converter=converter, + ) coreml_inputs = convert_to_coreml_inputs(mlmodel.input_description, input_data) @@ -236,6 +257,7 @@ def run_compare_torch( minimum_deployment_target=None, torch_device=torch.device("cpu"), frontend=TorchFrontend.TORCHSCRIPT, + converter=ct.convert, ): """ Traces a model and runs a numerical test. @@ -286,6 +308,7 @@ def run_compare_torch( converter_input_type=converter_input_type, compute_unit=compute_unit, minimum_deployment_target=minimum_deployment_target, + converter=converter, ) return model_spec, mlmodel, coreml_inputs, coreml_results, \ diff --git a/coremltools/converters/mil/frontend/torch/torch_op_registry.py b/coremltools/converters/mil/frontend/torch/torch_op_registry.py index ec8b07547..b542b5bfd 100644 --- a/coremltools/converters/mil/frontend/torch/torch_op_registry.py +++ b/coremltools/converters/mil/frontend/torch/torch_op_registry.py @@ -3,7 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -from typing import Callable, List +from typing import Callable import torch @@ -129,7 +129,7 @@ def func_wrapper(func): return func_wrapper(_func) -def is_torch_fx_node_supported(torch_fx_node: torch.fx.Node) -> bool: +def is_torch_fx_node_supported(torch_fx_node: "torch.fx.Node") -> bool: # There are many types of torch fx node: # 1. call_function # 2. call_module diff --git a/coremltools/converters/mil/frontend/torch/torchir_passes.py b/coremltools/converters/mil/frontend/torch/torchir_passes.py index 77d73c471..cf784cdf0 100644 --- a/coremltools/converters/mil/frontend/torch/torchir_passes.py +++ b/coremltools/converters/mil/frontend/torch/torchir_passes.py @@ -2,14 +2,16 @@ # # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + from collections import OrderedDict, defaultdict +from typing import Dict, Optional from coremltools import _logger as logger from .internal_graph import InternalTorchIRGraph, InternalTorchIRNode -def generate_tensor_assignment_ops(graph): +def generate_tensor_assignment_ops(graph: InternalTorchIRGraph) -> None: """ This graph pass handles inplace tensor assignments, specifically it handles: `torch.Tensor.copy_` and `torch.Tensor.fill_`. There are many other inplace tensor @@ -174,6 +176,7 @@ def _construct_nodes_to_fuse_inputs(nodes_to_fuse): outputs=outputs, kind=kind, blocks=[], + model_hierarchy=node.model_hierarchy, ) graph.nodes[i] = tensor_assign_node @@ -183,7 +186,50 @@ def _construct_nodes_to_fuse_inputs(nodes_to_fuse): graph.outputs[idx] = _get_updated_name(output, updated_tensor_count, out_alias) -def remove_getattr_nodes(graph): +def populate_native_const_model_hierarchy(graph: InternalTorchIRGraph) -> None: + """ + Torchscript doesn't capture the model hierarchy of those python native consts. + For instance: + + class Submodule(torch.nn.Module): + def forward(self, x): + x = x + 0.9 + x = x * 0.9 + return torch.relu(x) + + class Model(torch.nn.Module): + def __init__(self): + super().__init__() + self.submodule_1 = Submodule() + + def forward(self, x): + return self.submodule_1(x) + + The two ``0.9`` constants don't have the scope of Submodule. + In this graph pass, we make the model hierarchy of such constants inherited from + their child ops. + """ + + cached_model_hierarchy = {} + child_ops = defaultdict(list) + + for node in graph.nodes: + for b in node.blocks: + populate_native_const_model_hierarchy(b) + + for node in graph.nodes: + cached_model_hierarchy[node.name] = node.model_hierarchy + for val in node.inputs: + child_ops[val].append(node.name) + + for node in graph.nodes: + if node.kind != "constant": + continue + if node.model_hierarchy == "" and len(child_ops[node.name]) == 1: + node.model_hierarchy = cached_model_hierarchy[child_ops[node.name][0]] + + +def remove_getattr_nodes(graph: InternalTorchIRGraph) -> None: """ Remove the getattr nodes in the graph """ @@ -210,7 +256,9 @@ def remove_getattr_nodes(graph): graph.nodes = new_nodes -def transform_inplace_ops(graph, name_remap_dict=None): +def transform_inplace_ops( + graph: InternalTorchIRGraph, name_remap_dict: Optional[Dict[str, str]] = None +) -> None: # As we modify ops, we'll need to remap symbols. if name_remap_dict is None: @@ -272,7 +320,7 @@ def transform_inplace_ops(graph, name_remap_dict=None): graph.outputs[idx] = v -def flatten_graph_input_values(graph): +def flatten_graph_input_values(graph: InternalTorchIRGraph) -> None: """CoreML can't handle nested iterables of tensors, so we flatten the inputs of any graph that expects them. """ @@ -317,7 +365,7 @@ def flatten_graph_input_values(graph): graph.nodes = all_new_nodes + graph.nodes -def flatten_graph_output_values(graph): +def flatten_graph_output_values(graph: InternalTorchIRGraph) -> None: """ CoreML can't handle nested iterables of tensors, so we flatten the outputs of any graph that produces them. diff --git a/coremltools/converters/mil/frontend/torch/utils.py b/coremltools/converters/mil/frontend/torch/utils.py index 11f11bd10..62837fe5e 100644 --- a/coremltools/converters/mil/frontend/torch/utils.py +++ b/coremltools/converters/mil/frontend/torch/utils.py @@ -10,9 +10,17 @@ from coremltools.converters.mil.mil import types -# Some ops will receive a dtype input as an integer -# which maps to a torch dtype. The below mapping was found by -# converting test models with different dtypes passed to ones. +# NOTE [represent torch dtype by integer] +# In TorchScript, some ops will receive a dtype input as an integer which maps to a torch dtype. +# The below mapping was found by converting test models with different dtypes passed to ones. +# There is one modification to original torch mapping, though, due to CoreML lacks 64-bit dtype +# When mapping from torch dtype to integer number, we map +# * int64 to int32's number +# * float64 to float32's number +# When mapping from integer number back to torch dtype, we map +# * int64's number to int32 +# * float64's number to float32 +# TODO(https://github.com/apple/coremltools/issues/2153): This is confusing... we should refactor NUM_TO_TORCH_DTYPE = { 0: torch.uint8, 1: torch.int8, @@ -31,24 +39,30 @@ TORCH_DTYPE_TO_NUM = { dtype: val for val, dtype in NUM_TO_TORCH_DTYPE.items() } +TORCH_DTYPE_TO_NUM[torch.int64] = TORCH_DTYPE_TO_NUM[torch.int32] +TORCH_DTYPE_TO_NUM[torch.float64] = TORCH_DTYPE_TO_NUM[torch.float32] -NUMPY_DTYPE_TO_TORCH_NUM = { - np.uint8: 0, - np.int8: 1, - np.int16: 2, - np.int32: 3, - np.int64: 4, - np.float16: 5, - np.float32: 6, - np.float64: 7, - bool: 11, +NUM_TO_NUMPY_DTYPE = { + 0: np.uint8, + 1: np.int8, + 2: np.int16, + 3: np.int32, + 4: np.int32, + 5: np.float16, + 6: np.float32, + 7: np.float32, + 11: bool, } -NUM_TO_NUMPY_DTYPE = { - val: dtype for dtype, val in NUMPY_DTYPE_TO_TORCH_NUM.items() +NUMPY_DTYPE_TO_TORCH_NUM = { + dtype: val for val, dtype in NUM_TO_NUMPY_DTYPE.items() } +NUMPY_DTYPE_TO_TORCH_NUM[np.int64] = NUMPY_DTYPE_TO_TORCH_NUM[np.int32] +NUMPY_DTYPE_TO_TORCH_NUM[np.float64] = NUMPY_DTYPE_TO_TORCH_NUM[np.float32] NUM_TO_DTYPE_STRING = { + 0: "uint8", + 1: "int8", 2: "int16", 3: "int32", 4: "int32", @@ -59,10 +73,12 @@ } TYPE_TO_DTYPE_STRING = { - types.bool: "bool", + types.uint8: "uint8", + types.int8: "int8", + types.int32: "int32", types.fp16: "fp16", types.fp32: "fp32", - types.int32: "int32", + types.bool: "bool", } TORCH_QTYPE_TO_NP_TYPE = { diff --git a/coremltools/converters/mil/mil/__init__.py b/coremltools/converters/mil/mil/__init__.py index 2ec248b9e..96ac3a8f7 100644 --- a/coremltools/converters/mil/mil/__init__.py +++ b/coremltools/converters/mil/mil/__init__.py @@ -20,7 +20,6 @@ ) from .operation import Operation, mil_list, precondition from .program import ( - InputType, Placeholder, Program, Symbol, diff --git a/coremltools/converters/mil/mil/block.py b/coremltools/converters/mil/mil/block.py index 3191911b5..9c5e88209 100644 --- a/coremltools/converters/mil/mil/block.py +++ b/coremltools/converters/mil/mil/block.py @@ -5,15 +5,18 @@ import copy from collections import Counter, OrderedDict -from typing import Tuple +from typing import List, Optional, Set, Tuple, Union from coremltools import _OPSET from coremltools import _logger as logger from coremltools.converters.mil._deployment_compatibility import AvailableTarget as _target +from coremltools.converters.mil.input_types import InputType from . import SPACES, types from .operation import Operation +from .scope import SCOPE_STACK, VALID_OPS_TO_COPY_SCOPE_INFO, ScopeSource, add_graph_pass_scope from .types.symbolic import is_symbolic, k_used_symbols +from .utils import CacheDoublyLinkedList from .var import ComplexVar, InternalVar, Var from .visitors.dot_visitor import DotVisitor @@ -21,6 +24,7 @@ BLOCK_STACK = [] DEBUG = False + def curr_block(): if len(BLOCK_STACK) == 0: raise ValueError("Must call Builder inside an Function" + " or Block") @@ -50,6 +54,8 @@ class Block: "operations", "_internal_vars", "outer_op", + "cache_operations", + "_essential_scope_sources", ] counter = 0 @@ -108,7 +114,7 @@ def __init__(self, block_inputs=None, outer_op=None, name=None): self.name = Block._get_new_name() # list[Operation]. Topologically sorted. - self.operations = [] + self.operations = CacheDoublyLinkedList() # Must be set before self.validate() self.outer_op = outer_op @@ -124,22 +130,99 @@ def __init__(self, block_inputs=None, outer_op=None, name=None): # (infinite recursion). They must be considered as always visible. self._internal_vars = set() + # List[ScopeSource]. During graph pass, those scope source cannot be missed + self._essential_scope_sources = [] + if self.outer_op is None and not isinstance(self, Function): msg = "Block {} is not Function and thus outer_op cannot be None" raise ValueError(msg.format(self.name)) self.validate() - def validate(self): + def _add_essential_scope_source( + self, scope_source: Union[ScopeSource, List[ScopeSource]] + ) -> None: + """ + Add essential scope sources to self._essential_scope_sources. + When self.validate() is called, we make sure that all source info are not missing. + """ + if not isinstance(scope_source, list): + scope_source = [scope_source] + + for source in scope_source: + if source in self._essential_scope_sources: + raise ValueError(f"{source} already exist in _essential_scope_sources.") + self._essential_scope_sources.append(source) + + def _check_has_scope_info(self) -> None: + """ + Check no ops in the function are missing scope information. + """ + + def _check_has_scope_info_block(block: Block): + for op in block.operations: + for b in op.blocks: + _check_has_scope_info_block(b) + for scope in self._essential_scope_sources: + if scope not in op.scopes or len(op.scopes[scope]) == 0: + raise ValueError( + f"op {op.name} with scopes {op.scopes} is missing essential scopes {scope}." + ) + + _check_has_scope_info_block(self) + + def _check_vars_visibility_in_block( + self, visible_vars_from_outer_block: Optional[Set[Var]] = None + ): + """ + This utils does a one pass program-wise checking of vars visibility. + That is, each input of an op, should appear before the op in the sequantial order. + + For the debug purpose, if you want to pinpoint the operation which caused the + invalid program state, please set DEBUG=True, and it will be captured by the ``is_var_visible_in_block`` utils. + """ + if visible_vars_from_outer_block is None: + visible_vars_from_outer_block = set() + block_inputs = list(self.inputs.values()) if isinstance(self, Function) else self.inputs + visible_vars_in_block = set(block_inputs) + + for op in self.operations: + for b in op.blocks: + b._check_vars_visibility_in_block( + visible_vars_from_outer_block=visible_vars_from_outer_block.union( + visible_vars_in_block + ) + ) + for val in op.get_flattened_inputs(): + if ( + val not in self._internal_vars + and val not in visible_vars_in_block + and val not in visible_vars_from_outer_block + ): + raise ValueError(f"Var {val} not visible in the block {self.name}.") + for out_var in op.outputs: + visible_vars_in_block.add(out_var) + + def validate( + self, + force_validate: Optional[bool] = False, + check_essential_scope: Optional[bool] = False, + ) -> None: """ Basic validation to protect against some invalid state. + If force_validate is False, the validation is done only if the global variable DEBUG=True. """ - if not DEBUG: + if not DEBUG and not force_validate: return + # Check vars visibility + if isinstance(self, Function): + self._check_vars_visibility_in_block() + + # Other validations for op in self.operations: for b in op.blocks: - b.validate() + b.validate(force_validate=force_validate) if op.outputs is None: raise InvalidBlockStateError() @@ -191,6 +274,20 @@ def validate(self): msg = "Var {} should be output of block {}: {}" raise ValueError(msg.format(ov.name, b.name, b)) + # checking internal vars are consistent with self._internal_vars + internal_var_in_block = set() + for op in self.operations: + for v in op.internal_inputs.values(): + internal_var_in_block.add(v) + if not internal_var_in_block == self._internal_vars: + raise ValueError( + "internal vars in the block are not consistent with self._internal_vars." + ) + + # check essential scope info are not missing + if check_essential_scope: + self._check_has_scope_info() + def remove_inputs(self, curr_input_vars): """ curr_input_vars: list[Var], whose elements must be in @@ -236,7 +333,7 @@ def inputs(self): def outputs(self): return self._outputs - def is_var_visible_in_block(self, var, upto_op_with_id=None): + def is_var_visible_in_block(self, var: Var, upto_op: Optional[Operation] = None): """ Checks if a var is visible to ops starting from id=`upto_op_with_id` inside the block. @@ -248,33 +345,60 @@ def is_var_visible_in_block(self, var, upto_op_with_id=None): If upto_op_with_id is None, outputs of all operations inside the block are visible to that block. + + For debugging: + - By default (DEBUG=False), this utils is guarded by the flag in calling code and not running. + - By setting DEBUG=True, this utils is triggered in multiple places in the code base, + so the users can pinpoint the exact place where an invalid operation is made by the converter. + Beware that, the converter could be slow in the debug mode, since the overal conversion + time will explode to O(N^2) in the average cases by this util. """ + if not DEBUG: + # Only in debug mode, there is a chance that self.operations is type of list when executing this function. + assert isinstance( + self.operations, CacheDoublyLinkedList + ), "operations must be type of CacheDoublyLinkedList." if var in self._internal_vars: return True - inputs = self.function_inputs if isinstance(self, Function) else self.inputs + inputs = list(self.inputs.values()) if isinstance(self, Function) else self.inputs if var in inputs: return True - idx = len(self.operations) if upto_op_with_id is None else upto_op_with_id - - for i in range(idx-1, -1, -1): - op_outputs = self.operations[i].outputs - if op_outputs is not None and var in op_outputs: + if upto_op is None: + if var.op in self.operations: return True + else: + if isinstance(self.operations, list): + # This could only happen in debug mode + assert DEBUG is True, "block.operations can only be type of list in debug mode." + idx = self.find_op_id_in_block(upto_op) + for i in range(idx - 1, -1, -1): + if var.op is self.operations[i]: + return True + else: + cursor = self.operations._get_node_from_op(upto_op).prev + while cursor is not None: + if cursor.op is var.op: + return True + cursor = cursor.prev if self.outer_op is not None: enclosing_block = self.outer_op.enclosing_block - outer_op_id = enclosing_block.find_op_id_in_block(self.outer_op) - if enclosing_block.is_var_visible_in_block(var, upto_op_with_id=outer_op_id): + if enclosing_block.is_var_visible_in_block(var, upto_op=self.outer_op): return True return False - def find_op_id_in_block(self, target_op): + def find_op_id_in_block(self, target_op: Operation) -> int: + if len(self.operations) > 0 and target_op == self.operations[-1]: + return len(self.operations) - 1 + + op_list = self.operations if isinstance(self.operations, list) else list(self.operations) + try: - idx = self.operations.index(target_op) + idx = op_list.index(target_op) except ValueError: raise ValueError("Op {} not found in {}: {}".format(target_op.name, self.name, self)) return idx @@ -287,13 +411,16 @@ def set_outputs(self, outputs): raise ValueError("Outputs must be list of Vars") self.validate() - for ov in outputs: - if not self.is_var_visible_in_block(ov): - msg = ( - "Var {} is not visible in block {} and thus cannot " - + "be a block output.\n{}" - ) - raise ValueError(msg.format(ov.name, self.name, self)) + + # check var visibility in debug mode + if DEBUG: + for ov in outputs: + if not self.is_var_visible_in_block(ov): + msg = ( + "Var {} is not visible in block {} and thus cannot " + + "be a block output.\n{}" + ) + raise ValueError(msg.format(ov.name, self.name, self)) # For duplicate vars in self._outputs, only remove block once. for ov in set(self._outputs): @@ -317,7 +444,7 @@ def __exit__(self, type, value, traceback): global BLOCK_STACK BLOCK_STACK = BLOCK_STACK[:-1] - def _insert_op_before(self, new_op, before_op=None): + def _insert_op_before(self, new_op: Operation, before_op: Optional[Operation] = None): """ A private API used by builder. Please use `builder.YOUR_OP(...,before_op)`. @@ -351,42 +478,91 @@ def _insert_op_before(self, new_op, before_op=None): """ self.validate() - idx = len(self.operations) if before_op is None else self.find_op_id_in_block(before_op) - - # check inputs are visible - for k, v in new_op.inputs.items(): - if not isinstance(v, (Var, tuple)): - continue - vs = [v] if isinstance(v, Var) else v - for v in vs: - if not self.is_var_visible_in_block(v, upto_op_with_id=idx): - before_op_name = before_op.name if before_op is not None else "None" - msg = "Op '{}' input {}={} is not in scope of {} before {}" - raise ValueError(msg.format(new_op.name, k, v.name, self.name, before_op_name)) + if isinstance(self.operations, CacheDoublyLinkedList): + self.operations.insert_op_before(new_op, before_op) + return - # add new_op if before_op is None: self.operations.append(new_op) - else: - self.operations.insert(idx, new_op) + return + + # check inputs visibility in debug mode + if DEBUG: + for k, v in new_op.inputs.items(): + if not isinstance(v, (Var, tuple)): + continue + vs = [v] if isinstance(v, Var) else v + for v in vs: + if not self.is_var_visible_in_block(v, upto_op=before_op): + before_op_name = before_op.name if before_op is not None else "None" + msg = "Op '{}' input {}={} is not in scope of {} before {}" + raise ValueError( + msg.format(new_op.name, k, v.name, self.name, before_op_name) + ) + + idx = self.find_op_id_in_block(before_op) + self.operations.insert(idx, new_op) def _replace_var( self, - old_var, - new_var, - start=0, - end_id=-1, - no_check_var_types=False, + old_var: Var, + new_var: Var, + anchor_op: Optional[Operation] = None, + end_op: Optional[Operation] = None, + no_check_var_types: Optional[bool] = False, ): """ Helper function for replace_uses_of_var_after_op """ + self._copy_metadata(old_var, new_var) + self._copy_scope_info(old_var, new_var) + num_ops_affected = 0 - if end_id == -1: - op_list = self.operations[start:] + # If we start checking right after the old_var, we can reduce the time + # complexity hugely, by only checking the child_ops, without iterating + # through whole program. + # This fix reduce the overall time from O(N) -> O(1). + replace_vars_right_after_old_var = ( + end_op is None + and len(self.operations) > 0 + and anchor_op is not None + and anchor_op is old_var.op + ) + + # We should only compute start_idx and end_idx once if needed. + start_idx = end_idx = None + + if replace_vars_right_after_old_var: + op_list = list(old_var.child_ops) else: - op_list = self.operations[start : end_id + 1] + if isinstance(self.operations, list): + start_idx = self.find_op_id_in_block(anchor_op) + 1 if anchor_op is not None else 0 + end_idx = ( + self.find_op_id_in_block(end_op) + if end_op is not None + else len(self.operations) - 1 + ) + op_list = self.operations[start_idx : end_idx + 1] + else: + assert isinstance( + self.operations, CacheDoublyLinkedList + ), f"Expect operations be type of CacheDoublyLinkedList. Got {type(self.operations)}." + if len(self.operations) == 0 and anchor_op is not None: + raise ValueError(f"anchor op {anchor_op} not in the block.") + + start_node = ( + self.operations.start + if anchor_op is None + else self.operations._get_node_from_op(anchor_op).next + ) + cursor = start_node + op_list = [] + while cursor is not None: + op_list.append(cursor.op) + if cursor.op is end_op: + break + cursor = cursor.next for op in op_list: new_inputs = {} @@ -409,7 +585,43 @@ def _replace_var( for b in op.blocks: num_ops_affected += b._replace_var(old_var, new_var) - if end_id != -1 and old_var.op not in op_list: + # Replace consuming_blocks's outputs. + # It is important to use list copy here, + # since replace_block_output_var is going to change the consuming_blocks + # Note that, there are some expensive index query in the following implementation, + # but overally it won't affect the time complexity too much, + # since we can assume the number of the block outputs in a program as a constant. + # As the result, the amortized time complexity will not blow up. + for b in list(old_var.consuming_blocks): + outer_op = b.outer_op + + if outer_op is not None: + # Query the start and end index if needed + if start_idx is None: + start_idx = ( + self.find_op_id_in_block(anchor_op) + 1 if anchor_op is not None else 0 + ) + if end_idx is None: + end_idx = ( + self.find_op_id_in_block(end_op) + if end_op is not None + else len(self.operations) - 1 + ) + + op_to_idx = {} + while outer_op is not None: + block = outer_op.enclosing_block + if block is self: + if len(op_to_idx) == 0: + for idx, op in enumerate(self.operations): + op_to_idx[op] = idx + op_idx = op_to_idx[outer_op] + if op_idx >= start_idx and op_idx <= end_idx: + b.replace_block_output_var(old_var, new_var) + break + outer_op = block.outer_op + + if end_op is not None and old_var.op not in op_list: return num_ops_affected if old_var in self._block_inputs: @@ -420,7 +632,6 @@ def _replace_var( # If old_var is block's output, replace as well. self.replace_block_output_var(old_var, new_var) - return num_ops_affected def replace_block_output_var( @@ -451,12 +662,11 @@ def replace_block_output_var( def try_replace_uses_of_var_after_op( self, - anchor_op, - old_var, - new_var, - end_op=None, - no_check_var_types=False, - no_check_var_visibility=False, + anchor_op: Operation, + old_var: Var, + new_var: Var, + end_op: Optional[Operation] = None, + no_check_var_types: Optional[bool] = False, ): """ :param anchor_op: Operation @@ -464,8 +674,7 @@ def try_replace_uses_of_var_after_op( :param new_var: Var :param end_op: Operation :param no_check_var_types: bool - :param no_check_var_visibility: bool - :return: True if the old_var can be replaced by new_var. False otherwise. + :return: True if the old_var can be replaced by new_var. False otherwsie. This helper function guards the replace_uses_of_var_after_op function, by first checking if the old_var could be replaced by the new_var. @@ -482,19 +691,65 @@ def try_replace_uses_of_var_after_op( old_var=old_var, new_var=new_var, no_check_var_types=no_check_var_types, - no_check_var_visibility=no_check_var_visibility, ) return True + @staticmethod + def _copy_scope_info(src: Var, dst: Var) -> None: + """ + Populate meta data from old var (src) to new var (dst) + """ + curr_scopes = SCOPE_STACK.get_curr_scopes() + + if ScopeSource.COREMLTOOLS_GRAPH_PASS in curr_scopes: + + if src.op in VALID_OPS_TO_COPY_SCOPE_INFO[-1]: + return + + elif dst.op in VALID_OPS_TO_COPY_SCOPE_INFO[-1]: + op = dst.op + assert op is not None, "new_var cannot be a placeholder output" + VALID_OPS_TO_COPY_SCOPE_INFO[-1].remove(op) + + # If old_var is a placeholder output, we assign defaults values to essential scope source + old_scopes = src.scopes + if len(old_scopes) == 0: + essential_scope_sources = op.enclosing_block._essential_scope_sources + for val in essential_scope_sources: + res = None + if val == ScopeSource.TORCHSCRIPT_MODULE_TYPE: + res = ["__COREML__::TORCHSCRIPT_PLACEHOLDER"] + elif val == ScopeSource.TORCHSCRIPT_MODULE_NAME: + res = [f"__COREML__::TORCHSCRIPT_PLACEHOLDER_{src.name}"] + elif val == ScopeSource.EXIR_DEBUG_HANDLE: + res = [None] + else: + raise ValueError(f"No default placeholder info for {val}.") + old_scopes[val] = res + + dst.scopes = add_graph_pass_scope(old_scopes, dst.scopes) + + for input in op.inputs.values(): + if not isinstance(input, (list, tuple)): + input = [input] + for i in input: + Block._copy_scope_info(src, i) + + @staticmethod + def _copy_metadata(old_var: Var, new_var: Var) -> None: + """ + Populate meta data from old var to new var + """ + return + def replace_uses_of_var_after_op( self, - anchor_op, - old_var, - new_var, - no_check_var_visibility=False, - end_op=None, - no_check_var_types=False, - force_replace=False, + anchor_op: Operation, + old_var: Var, + new_var: Var, + end_op: Optional[Operation] = None, + no_check_var_types: Optional[bool] = False, + force_replace: Optional[bool] = False, ): """ Replace all uses of `old_var` with `new_var` after `anchor_op`, @@ -508,9 +763,6 @@ def replace_uses_of_var_after_op( `end_op` is None, all occurrences of `old_var` are replaced in the block starting from the op just after `anchor_op` - no_check_var_visibility: True to disable the check ensuring new_var is visible - (visibility requirement depends on anchor_op). - no_check_var_types: An error will be raised if the type of new_var is not same as the old_var, unless `no_check_var_types` is set to True. Normally type inference is re-invoked for all the child ops of `old_var` after updating it to `new_var`. However, @@ -589,13 +841,10 @@ def replace_uses_of_var_after_op( ).format(old_var, new_var, err_var) raise ValueError(msg) - start = self.find_op_id_in_block(anchor_op) + 1 if anchor_op is not None else 0 - end_id = self.find_op_id_in_block(end_op) if end_op is not None else -1 - - if not no_check_var_visibility: + # It is expensive to check the var visibility, and it should only be done while debugging. + if DEBUG: self.validate() - idx = start if anchor_op is not None else len(self.operations) visibility_error_msg = ( "new_var '{}' is not visible in block '{}' at or before " + "anchor_op '{}'" @@ -603,51 +852,47 @@ def replace_uses_of_var_after_op( anchor_op_name = "None" if anchor_op is None else anchor_op.name if isinstance(new_var, ComplexVar): - # For CompleVar, as it's just a temp wrapper to transit the real and imag data, we + # For ComplexVar, as it's just a temp wrapper to transit the real and imag data, we # check the visibility of its real and imaginary Var instead. - if not self.is_var_visible_in_block(new_var.real, upto_op_with_id=idx): + if not self.is_var_visible_in_block(new_var.real, upto_op=anchor_op): raise ValueError( - visibility_error_msg.format( - new_var.real.name, self.name, anchor_op_name - ) + visibility_error_msg.format(new_var.real.name, self.name, anchor_op_name) ) - if not self.is_var_visible_in_block(new_var.imag, upto_op_with_id=idx): + if not self.is_var_visible_in_block(new_var.imag, upto_op=anchor_op): raise ValueError( - visibility_error_msg.format( - new_var.imag.name, self.name, anchor_op_name - ) + visibility_error_msg.format(new_var.imag.name, self.name, anchor_op_name) ) else: - if not self.is_var_visible_in_block(new_var, upto_op_with_id=idx): + if not self.is_var_visible_in_block(new_var, upto_op=anchor_op): raise ValueError( - visibility_error_msg.format( - new_var.name, self.name, anchor_op_name - ) + visibility_error_msg.format(new_var.name, self.name, anchor_op_name) ) + start = self.find_op_id_in_block(anchor_op) + 1 if anchor_op is not None else 0 + end_id = self.find_op_id_in_block(end_op) if end_op is not None else -1 - if end_id != -1 and end_id < start: - msg = "end_op '{}' comes before the anchor_op '{}'" - raise ValueError(msg.format(end_op.name, anchor_op.name)) + if end_id != -1 and end_id < start: + msg = "end_op '{}' comes before the anchor_op '{}'" + raise ValueError(msg.format(end_op.name, anchor_op.name)) num_ops_affected = self._replace_var( old_var, new_var, - start=start, - end_id=end_id, + anchor_op=anchor_op, + end_op=end_op, no_check_var_types=no_check_var_types, ) logger.debug("Num ops affected in replacing var: {}".format(num_ops_affected)) - def remove_ops(self, existing_ops): + def remove_ops(self, ops_to_remove: List[Operation]): """ - Remove ops in `existing_ops`. + Remove ops in `ops_to_remove`. - Args: existing_ops: List[Operation]. All ops in this list must be pre-existing in the + Args: ops_to_remove: List[Operation]. All ops in this list must be pre-existing in the block. It allows duplicated ops, but duplicated ops will only be removed once. Raises: - ValueError if any `op` in `existing_ops` meets any of following conditions: + ValueError if any `op` in `ops_to_remove` meets any of following conditions: - `op` is not found in the block - any other op in the block uses output Vars of `op` - the output var is block's output @@ -655,99 +900,44 @@ def remove_ops(self, existing_ops): self.validate() # Dedup ops because each op can only be deleted once. - existing_ops_set = set(existing_ops) - existing_ops = list(existing_ops_set) - # Find the idx of each to-be-removed op, and raise errors if any op couldn't be found. - idxs = [-1] * len(existing_ops) - for i, op in enumerate(self.operations): - if op in existing_ops_set: - idxs[existing_ops.index(op)] = i - if -1 in idxs: - not_found = [] - for i, op in zip(idxs, existing_ops): - if i == -1: - not_found.append(op.name) - raise ValueError( - "Ops {} not found in block {}".format(not_found, self.name) - ) - - # Remove ops in reverse topological order - pairs = list(zip(idxs, existing_ops)) - pairs.sort(key=lambda x: x[0], reverse=True) + ops_to_remove_set = set(ops_to_remove) + ops_to_remove = list(ops_to_remove_set) - for idx, op in pairs: + for op in ops_to_remove: for i, v in enumerate(op.outputs): - # Check that no ops depend on op's outputs - if len(v.child_ops) > 0: - child_op_names = [s.name for s in v.child_ops] - msg = ( - "Cannot delete op '{}' with active output at id {}: '{}' " - + "used by ops {}" - ) - raise ValueError(msg.format(op.name, i, v.name, child_op_names)) # Check that the output Var isn't block's output if v in self._outputs: - msg = ( - "cannot delete op {} with output {}: {} " - + "that's block {}'s output" + raise ValueError( + f"cannot delete op {op.name} with output {i}: {v.name} that's block {self.name}'s output." ) - raise ValueError(msg.format(op.name, i, v.name, self.name)) for b in op.blocks: b.set_outputs([]) b.remove_ops(b.operations) - # Remove the op (in reverse topological order) - self.operations.pop(idx) - op.enclosing_block = None - - for v in op.inputs.values(): - if isinstance(v, (tuple, list)): - for vv in v: - vv.remove_child_op(op) - else: - v.remove_child_op(op) - - def operations_for_vars(self, end_vs): - """ - Inputs: - - end_vs: list[Operation]. - - Return: + self.operations.remove(op) - list[Operation] which are subset of self.operations that are ancestors - of `end_vs`. Also do recursion into nested blocks. - """ - used_vars = set(end_vs) - used_ops = [] - for op in reversed(self.operations): - # if none of op's output is used, delete op - if not set(op.outputs).intersection(used_vars): - continue - - used_ops.append(op) # append in reverse topological order + op.enclosing_block = None - # recursively search for nested blocks - ops_to_check = [] - for b in op.blocks: - ops_to_check += b.operations_for_vars(b.outputs) - ops_to_check.append(op) + for v in op.get_flattened_inputs(): + v.remove_child_op(op) - # mark used vars - for op_to_check in ops_to_check: - # mark all op's inputs to used - for _, input_var in op_to_check.inputs.items(): - if isinstance(input_var, (tuple, list)): - used_vars.update(list(input_var)) - else: - used_vars.add(input_var) + # Remove InternalVar from self._internal_vars + for v in op.internal_inputs.values(): + self._internal_vars.remove(v) - return used_ops[::-1] + # In the end, we check no ops depend on removed op's outputs + for op in ops_to_remove: + for i, v in enumerate(op.outputs): + if len(v.child_ops) > 0: + child_op_names = [s.name for s in v.child_ops] + raise ValueError( + f"Cannot delete op '{op.name}' with active output at id {i}: '{v.name}' used by ops {child_op_names}." + ) def _propagate_nonreplaceable_vars(self): def propagate_nonreplaceable_vars_block(block): - for op in list(block.operations): + for op in block.operations: for b in op.blocks: propagate_nonreplaceable_vars_block(b) if op.outputs is None: @@ -757,7 +947,7 @@ def propagate_nonreplaceable_vars_block(block): o._set_nonreplaceable_vars_upstream() propagate_nonreplaceable_vars_block(self) - def indented_str(self, indent=None): + def indented_str(self, indent: Optional[str] = None, print_attr: Optional[bool] = False) -> str: if indent is None: indent = "" s = ( @@ -768,7 +958,7 @@ def indented_str(self, indent=None): ) s += ") {\n" for op in self.operations: - s += op.indented_str(indent + SPACES * 1) + s += op.indented_str(indent + SPACES * 1, print_attr=print_attr) s += indent + "} -> (" if self._outputs is not None: s += ", ".join(["%" + v.name for v in self._outputs]) @@ -842,17 +1032,18 @@ def __init__(self, inputs, opset_version=None): """ self.placeholder_inputs = inputs self.opset_version = opset_version + self.output_types = None + self.input_types = [] # str -> Var self._input_dict = OrderedDict() for k, v in self.placeholder_inputs.items(): v.set_name(k) # set to user input name self._input_dict[k] = v.outputs[0] - self.function_inputs = tuple(self._input_dict.values()) global k_used_symbols global k_num_internal_syms - for inp in self.function_inputs: + for inp in self._input_dict.values(): if types.is_tensor(inp.dtype): shapes = inp.dtype.get_shape() for s in shapes: @@ -884,7 +1075,9 @@ def __repr__(self): def __str__(self): return self.to_str("function") - def to_str(self, func_name="function"): + def to_str( + self, func_name: Optional[str] = "function", print_attr: Optional[bool] = False + ) -> str: func_name = func_name + "[{}]".format(_OPSET[self.opset_version]) if len(self._input_dict) == 0: s = func_name + "()" @@ -893,9 +1086,10 @@ def to_str(self, func_name="function"): s = func_name + "(" + str(inputs[0][1]) for in_name, ph in inputs[1:]: s += ",\n" + " " * (len(func_name) + 1) + str(ph) - s += ") {\n" - s += self.indented_str(SPACES) - s += "}\n" + s += ")" + s += " {\n" + s += self.indented_str(SPACES, print_attr=print_attr) + s += "}\n" return s def get_max_opset_version_and_op(self) -> Tuple[_target, Operation]: @@ -909,7 +1103,7 @@ def get_max_opset_version_and_op(self) -> Tuple[_target, Operation]: def update_max_opset_version_block(block): nonlocal max_opset_version nonlocal op_with_max_opset_version - for op in list(block.operations): + for op in block.operations: for b in op.blocks: update_max_opset_version_block(b) if not hasattr(op, "_op_variants") or not isinstance(op._op_variants, dict): @@ -920,3 +1114,25 @@ def update_max_opset_version_block(block): update_max_opset_version_block(self) return max_opset_version, op_with_max_opset_version + + def set_output_types(self, outputs: Optional[List[InputType]] = None) -> None: + """ + Set the user defined output type for a function. + Note: the common::update_output_dtypes graph pass takes this information, + and changes the function output signature accordingly. + """ + if outputs is not None: + if not ( + isinstance(outputs, list) and all([isinstance(out, InputType) for out in outputs]) + ): + raise TypeError( + "main outputs should be a list of type ct.TensorType or ct.ImageType" + ) + self.output_types = outputs + + def set_input_types(self, input_types: List[InputType]): + if not isinstance(input_types, tuple): + raise ValueError("main inputs should be tuple of TensorType or ImageType") + elif not all([isinstance(inp, InputType) for inp in input_types]): + raise ValueError("main inputs should be tuple of InputSpec") + self.input_types = input_types diff --git a/coremltools/converters/mil/mil/builder.py b/coremltools/converters/mil/mil/builder.py index f53bbd1fb..95c74890c 100644 --- a/coremltools/converters/mil/mil/builder.py +++ b/coremltools/converters/mil/mil/builder.py @@ -5,17 +5,25 @@ import numbers from collections import defaultdict -from typing import Callable, List, Optional +from typing import Any, Callable, List, Optional, Tuple, Type import numpy as np from coremltools import _logger as logger +from coremltools.converters.mil import mil from coremltools.converters.mil._deployment_compatibility import AvailableTarget from coremltools.converters.mil.mil.types.symbolic import any_symbolic from .block import Function, curr_block from .input_type import InternalInputType, ListOrTensorInputType, TensorInputType, TupleInputType -from .program import Placeholder, Program +from .program import Placeholder +from .scope import ( + SCOPE_STACK, + VALID_OPS_TO_COPY_SCOPE_INFO, + ScopeContextManger, + ScopeInfo, + ScopeSource, +) from .var import InternalVar, Var @@ -131,6 +139,8 @@ def _create_vars(cls, input_spec, op_name, before_op, new_var_name = op_name + "_" + k if isinstance(in_type, TupleInputType): var = [] + if not isinstance(val, (list, tuple)): + raise ValueError(f"Invalid type {type(val)} for TupleInputType param.") for i, v in enumerate(val): if isinstance(v, Var): var.append(v) @@ -165,8 +175,16 @@ def _add_op(cls, op_cls, **kwargs): op_name=kwargs["name"], before_op=before_op, candidate_kv=kwargs)) kwargs["enclosing_block"] = curr_block() + + # Add scope information + current_scopes = SCOPE_STACK.get_curr_scopes() + kwargs["scopes"] = current_scopes new_op = op_cls(**kwargs) + # We record if the op is created under graph pass + if len(current_scopes) == 1 and ScopeSource.COREMLTOOLS_GRAPH_PASS in current_scopes: + VALID_OPS_TO_COPY_SCOPE_INFO[-1].add(new_op) + # Initialize optional input Vars if it wasn't in kwargs default_inputs = new_op.default_inputs() # Shallow copy list inputs to ensure op inputs are immutable @@ -187,8 +205,13 @@ def _add_op(cls, op_cls, **kwargs): return new_op.outputs @staticmethod - def placeholder(shape, dtype=None, allow_rank0_input=False): - return Placeholder(shape, dtype, allow_rank0_input=allow_rank0_input) + def placeholder( + shape: Tuple[Any], + dtype: Optional[Type] = None, + allow_rank0_input: Optional[bool] = False, + name: Optional[str] = None, + ) -> Placeholder: + return Placeholder(shape, dtype, allow_rank0_input=allow_rank0_input, name=name) @staticmethod def TensorSpec(shape, dtype=None): @@ -294,7 +317,65 @@ def program( """ def wrapper(main_block): function = Builder._create_function(main_block, input_specs, opset_version) - program = Program() + program = mil.Program() program.add_function(function_name, function) return program return wrapper + + @staticmethod + def scope( + *scopes: List[ScopeInfo], + ) -> ScopeContextManger: + """ + The ``mb.scope`` creates a context manager, which makes the operations created within it have the corresponding scope information. + + Parameters + ---------- + scopes: Optional[List[ScopeInfo]] (Optional) + * A list of ScopeInfo under the context manager. + * The source in each ScopeInfo cannot be duplicated. + * If not provided, this context manager does no affects. + + Examples + -------- + Here is an example of creating a scope for torchscript module heirarchy with type and name information. + + .. sourcecode:: python + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + ): + return mb.add(x=x, y=4.3, name="add_1") + + + In the above example, the "add_1" op will have two scope attributes, for torchscipt module type and name: + * TORCHSCRIPT_MODULE_TYPE: ["Module1"] + * TORCHSCRIPT_MODULE_NAME: ["module_1"] + + Here is an example of creating nested scopes: + + .. sourcecode:: python + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1"]), + ): + x = mb.add(x=x, y=4.3, name="add_1") + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module2"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_2"]), + ): + return mb.add(x=x, y=3.2, name="add_2") + + In the above example, the "add_1" op would have a scope attribute: + * TORCHSCRIPT_MODULE_TYPE: ["Module1"] + + while the "add_2" op would have scope attributes: + * TORCHSCRIPT_MODULE_TYPE: ["Module1", "Module2"] + * TORCHSCRIPT_MODULE_NAME: ["module_2"] + """ + return ScopeContextManger(*scopes) diff --git a/coremltools/converters/mil/mil/operation.py b/coremltools/converters/mil/mil/operation.py index 050b1a494..8f3536f06 100644 --- a/coremltools/converters/mil/mil/operation.py +++ b/coremltools/converters/mil/mil/operation.py @@ -3,7 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -from typing import Any, Dict, Tuple +from typing import Any, Dict, Optional, Tuple import numpy as np @@ -167,6 +167,7 @@ def __init__(self, **kwargs): self._input_vars = {} self.blocks = [] self.enclosing_block = kwargs["enclosing_block"] + self.scopes = kwargs["scopes"] # Initialize inputs as object attributes (all None) for k in self._input_types.keys(): @@ -205,10 +206,10 @@ def _check_expected_inputs(self, kwargs): "value", "version", "before_op", - "no_check_var_visibility", # no_check_var_visibility==True to deviate from SSA "no_check_var_types", # no_check_var_types==True to force set inputs, even if type does not match with earlier ones "enclosing_block", + "scopes", ] for k in kwargs.keys(): if k not in non_attributes and k not in self._input_types: @@ -541,6 +542,13 @@ def inputs(self): if not isinstance(v, InternalVar) and v is not None } + @property + def internal_inputs(self) -> Dict[str, InternalVar]: + """ + Get internal var inputs of an op. + """ + return {k: v for k, v in self._input_vars.items() if isinstance(v, InternalVar)} + @property def outputs(self): return self._output_vars @@ -584,23 +592,26 @@ def var_to_str(v): return "%" + v.name - def indented_str(self, indent=""): + def indented_str(self, indent: Optional[str] = "", print_attr: Optional[bool] = False) -> str: if self.op_type == "const": return "" s = indent if self.outputs is not None: s += ", ".join([str(o) for o in self.outputs]) - s += " = " + self.op_type + "(" - s += ", ".join( - [ - k + "=" + Operation.var_to_str(self.inputs[k]) - for k in self._input_types.keys() - if k in self.inputs and not is_internal_input(k) - ] - ) + + if print_attr: + attr = "[" + for k, v in self.scopes.items(): + attr += f"{k}: {v}, " + attr = attr[:-2] + "]" + else: + attr = "" + + s += " = " + self.op_type + attr + "(" + s += ", ".join([k + "=" + Operation.var_to_str(v) for k, v in self.inputs.items()]) s += ', name="{}")\n'.format(self.name) for b in self.blocks: - s += b.indented_str(indent=indent + SPACES) + s += b.indented_str(indent=indent + SPACES, print_attr=print_attr) return s def __repr__(self): diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/control_flow.py b/coremltools/converters/mil/mil/ops/defs/iOS15/control_flow.py index f858ae46c..40e5d3126 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/control_flow.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/control_flow.py @@ -143,13 +143,13 @@ class Const(Operation): Parameters ---------- + val: const<\*,T> (Required) + mode: immediate_value, file_value (Optional) * Determines how the constant value is stored in the internal MIL format. * For large constants such as convolution weights, use ``file_value``. * For smaller-size constants such as values of a stride, use ``immediate_value``. - val: const<\*,T> (Required) - Returns ------- const<\*,T> @@ -355,7 +355,7 @@ def _check_equal_value(val1, val2): @staticmethod def _clean_up_child_ops(block): - for op in list(block.operations): + for op in block.operations: for b in op.blocks: while_loop._clean_up_child_ops(b) diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/linear.py b/coremltools/converters/mil/mil/ops/defs/iOS15/linear.py index 5e45864c7..3a5804df5 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/linear.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/linear.py @@ -224,10 +224,10 @@ def type_inference(self): def value_inference(self): x = self.x.val if self.transpose_x.val: - x = np.transpose(x) + x = np.swapaxes(x, -1, -2) y = self.y.val if self.transpose_y.val: - y = np.transpose(y) + y = np.swapaxes(y, -1, -2) return np.matmul(x, y) diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/scatter_gather.py b/coremltools/converters/mil/mil/ops/defs/iOS15/scatter_gather.py index 6650c4438..216bb3e19 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/scatter_gather.py @@ -6,15 +6,11 @@ import numpy as np from coremltools.converters.mil.mil import Operation, types -from coremltools.converters.mil.mil.input_type import (DefaultInputs, - InputSpec, - TensorInputType) -from coremltools.converters.mil.mil.operation import (SYMBOL, VALUE, - precondition) +from coremltools.converters.mil.mil.input_type import DefaultInputs, InputSpec, TensorInputType +from coremltools.converters.mil.mil.operation import SYMBOL, VALUE, precondition from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op from coremltools.converters.mil.mil.ops.defs._utils import compute_gather -from coremltools.converters.mil.mil.types.symbolic import ( - is_compatible_symbolic_vector) +from coremltools.converters.mil.mil.types.symbolic import is_compatible_symbolic_vector, is_symbolic @register_op @@ -78,7 +74,7 @@ class gather(Operation): indices=TensorInputType(type_domain=types.int32), axis=TensorInputType(const=True, optional=True, type_domain=types.int32), ) - + type_domains = { "T": (types.fp16, types.fp32, types.int32), } @@ -96,11 +92,8 @@ def value_inference(self): # only allow x to be symbolic. indices cannot. return None return compute_gather( - params=self.x.sym_val, - indices=self.indices.val, - axis=self.axis.val, - batch_dims=0 - ) + params=self.x.sym_val, indices=self.indices.val, axis=self.axis.val, batch_dims=0 + ) def type_inference(self): out_type = self.x.dtype @@ -204,7 +197,7 @@ class scatter(Operation): axis=TensorInputType(const=True, optional=True, type_domain=types.int32), mode=TensorInputType(const=True, optional=True, type_domain=types.str), ) - + type_domains = { "T": (types.fp16, types.fp32, types.int32), } @@ -270,7 +263,7 @@ class gather_along_axis(Operation): indices=TensorInputType(type_domain=types.int32), axis=TensorInputType(const=True, optional=True, type_domain=types.int32), ) - + type_domains = { "T": (types.fp16, types.fp32, types.int32), } @@ -308,8 +301,14 @@ def type_inference(self): axis = axis if axis >= 0 else axis + self.x.rank for i in range(self.x.rank): - if i != axis: - assert self.x.shape[i] == self.indices.shape[i] + x_size = self.x.shape[i] + indices_size = self.indices.shape[i] + if i != axis and not is_symbolic(x_size) and not is_symbolic(indices_size): + if x_size != indices_size: + raise AssertionError( + "The input data and indices should have the same size at " + f"axis {i}, but got {x_size} vs {indices_size}" + ) return types.tensor(self.x.dtype, self.indices.shape) @@ -469,7 +468,7 @@ class gather_nd(Operation): x=TensorInputType(type_domain="T"), indices=TensorInputType(type_domain=types.int32), ) - + type_domains = { "T": (types.fp16, types.fp32, types.int32), } @@ -528,7 +527,7 @@ class scatter_nd(Operation): updates=TensorInputType(type_domain="T"), mode=TensorInputType(const=True, optional=True, type_domain=types.str), ) - + type_domains = { "T": (types.fp16, types.fp32, types.int32), } diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_operation.py b/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_operation.py index 9a5340764..a32eeefb3 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_operation.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_operation.py @@ -365,7 +365,7 @@ class one_hot(Operation): ---------- indices: tensor<[D], i32> (Required) * Tensor, values indicate the locations for each one-hot vector to take the ``on_value``. - one_got_vector_size: i32 (Required) + one_hot_vector_size: i32 (Required) * Indicates the number of returning vectors. axis: const i32 (Optional) * Indicates which dimension to append the new axis. @@ -444,7 +444,7 @@ def type_inference(self): @register_op class pad(Operation): """ - Pad a tensor. + Pads a tensor. Parameters ---------- @@ -570,10 +570,10 @@ class range_1d(Operation): Parameters ---------- - end: (Required) - * The upper limit of the sequence, exclusive. start: (Required) * The start point of the sequence. + end: (Required) + * The upper limit of the sequence, exclusive. step: (Required) * Number that increments ``start``. @@ -1117,6 +1117,10 @@ class split(Operation): * The tensors may be variadic, but the number of tensors must be determined at compile time (i.e. a tuple). + axis: const (Required) + * The dimension along which to concatenate. Must be in the + range ``[-rank(x), rank(x))``. + num_splits: (Optional) If specified, divide ``x`` into ``num_splits`` tensors along ``axis``. Its behavior depends on ``split_sizes``: @@ -1134,10 +1138,6 @@ class split(Operation): * Sizes to split to. The sum of ``split_sizes`` must equal to ``value.shape[axis]``. - axis: const (Required) - * The dimension along which to concatenate. Must be in the - range ``[-rank(x), rank(x))``. - Returns ------- Tuple[tensor<\*?, T>] diff --git a/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_transformation.py b/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_transformation.py index 180c79f33..9ced519cd 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_transformation.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS15/tensor_transformation.py @@ -585,9 +585,9 @@ class slice_by_size(Operation): ---------- x: tensor<*?, T> (Required) * Input tensor. - begin: tensor<[rank(x)], i32> Required + begin: tensor<[rank(x)], i32> (Required) * The begin index for slice. - size: tensor<[rank(x)], i32> Required + size: tensor<[rank(x)], i32> (Required) * The size that is to be sliced. If ``size`` is ``-1``, all the remaining elements starting with "begin" are sliced. @@ -884,6 +884,9 @@ class squeeze(Operation): * Must be at least 1-D. axes: const (Optional) * Axes to squeeze out. + * The behaviour of squeezing non-single dimensions follow PyTorch instead of NumPy, where + it ignores non-single dimensions instead of erroring out. More specifically, if x has + shape (2, 3, 4) and axes is [0, 1], the output will be a tensor with shape (2, 3, 4). * Default to remove all single-dimensions. Returns @@ -923,9 +926,11 @@ def type_inference(self): for i in sorted(axes)[::-1]: # descending order if len(squeezed_shape) <= i: raise ValueError( - "Cannot squeeze dim {} for shape {}".format(i, squeezed_shape) + f"Invalid axis {i} in squeeze. The axis should be smaller than {len(squeezed_shape)}" ) - squeezed_shape.pop(i) + if squeezed_shape[i] == 1: + # Only remove the dim_size=1 dimension. + squeezed_shape.pop(i) return types.tensor(x_type, tuple(squeezed_shape)) if len(squeezed_shape) != 0 else x_type diff --git a/coremltools/converters/mil/mil/ops/defs/iOS16/constexpr_ops.py b/coremltools/converters/mil/mil/ops/defs/iOS16/constexpr_ops.py index 899c8e4a3..925ec149d 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS16/constexpr_ops.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS16/constexpr_ops.py @@ -116,20 +116,37 @@ def materialized_val_inference(self): self.quantized_data.val, self.zero_point.val, self.scale.val, self.axis.val ) + def is_all_zeros(self) -> bool: + zero_point = self.promote_rank_to_same_as_quantized_data( + self.zero_point.val, self.quantized_data.val, self.axis.val + ) + return np.all(self.quantized_data.val == zero_point) + @staticmethod - def decompress(quantized_data, zero_point, scale, axis): + def promote_rank_to_same_as_quantized_data( + param: np.ndarray, quantized_data: np.ndarray, axis: int + ) -> np.ndarray: + """ + Promote param (i.e. zero point or scale) rank to same as quantized data, + so subtraction or multiplication can happen properly on the specified axis + """ + if len(param.shape) == 0: + return np.reshape(param, np.ones(len(quantized_data.shape), np.int32)) + else: + axes = [i for i in range(len(quantized_data.shape)) if i != axis] + return np.expand_dims(param, axis=tuple(axes)) + @staticmethod + def decompress( + quantized_data: np.ndarray, zero_point: np.ndarray, scale: np.ndarray, axis: int + ) -> np.ndarray: axis = axis if axis >= 0 else axis + len(quantized_data.shape) - - def rank_promoted_to_same_as_quantized_data(param): - if len(param.shape) == 0: - return np.reshape(param, np.ones(len(quantized_data.shape), np.int32)) - else: - axes = [i for i in range(len(quantized_data.shape)) if i != axis] - return np.expand_dims(param, axis=tuple(axes)) - - sc = rank_promoted_to_same_as_quantized_data(scale) - zp = rank_promoted_to_same_as_quantized_data(zero_point) + sc = constexpr_affine_dequantize.promote_rank_to_same_as_quantized_data( + scale, quantized_data, axis + ) + zp = constexpr_affine_dequantize.promote_rank_to_same_as_quantized_data( + zero_point, quantized_data, axis + ) val = sc * (quantized_data.astype(np.float32) - zp.astype(np.float32)) return val.astype(scale.dtype) @@ -296,8 +313,8 @@ class constexpr_sparse_to_dense(Operation): shape: const tensor (Required) - Notes - ----- + Notes + ----- * Any data is packed and read in a row-major order. * ``mask`` contains ``M`` bytes, where ``M = ceil( product(shape) / 8)``. That is, each bit field corresponds to one element in the output tensor. @@ -311,7 +328,7 @@ class constexpr_sparse_to_dense(Operation): .. sourcecode:: python shape = (5,) => M = 1 bytes - + MSB LSB | | mask = |x x x 0 1 1 0 0 | <== packed elements diff --git a/coremltools/converters/mil/mil/ops/defs/iOS17/quantization_ops.py b/coremltools/converters/mil/mil/ops/defs/iOS17/quantization_ops.py index 0e635cea9..6b0c87ffd 100644 --- a/coremltools/converters/mil/mil/ops/defs/iOS17/quantization_ops.py +++ b/coremltools/converters/mil/mil/ops/defs/iOS17/quantization_ops.py @@ -93,12 +93,12 @@ class quantize(Operation): ``input.shape[axis]``; that is, equal to ``3``. - This is broadcasted to ``(1, 3, 1, 1)``. - axis: const tensor (Optional) - output_dtype: const tensor (Required) * This parameter can take ``"uint8"``, ``"int8"`` as values. * The ``output_dtype`` value must match the ``zero_point`` dtype. + axis: const tensor (Optional) + Returns ------- tensor @@ -224,8 +224,21 @@ def type_inference(self): _check_scale_zp_shapes(self.input, self.scale, self.zero_point, self.axis) return types.tensor(self.scale.dtype, self.input.shape) - @precondition(allow=VALUE) - def value_inference(self): + def can_materialize_val(self) -> bool: + if self.input.val is None: + return False + if self.scale.val is None: + return False + if self.zero_point is not None and self.zero_point.val is None: + return False + if self.axis is not None and self.axis.val is None: + return False + return True + + def materialized_val_inference(self) -> np.ndarray: + if not self.can_materialize_val(): + return None + quantized_data = self.input.val if self.zero_point is not None: zero_point = self.zero_point.val diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_activation.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_activation.py index a7ff49a81..c80f90ddb 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_activation.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_activation.py @@ -912,7 +912,8 @@ def test_value_inference(self, input_size): def prog(): return mb.softmax(x=x, axis=axis) - op = list(prog.functions.values())[0].operations[2] + ops = list(prog.functions.values())[0].operations + op = list(ops)[2] assert op.op_type == "softmax" np.testing.assert_allclose( op.value_inference(), diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_control_flow.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_control_flow.py index a8f0232a2..bcb8278a0 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_control_flow.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_control_flow.py @@ -331,7 +331,7 @@ def cond(res, bx): def test_builder_to_backend_nested(self, compute_unit, backend): if backend.backend == "neuralnetwork": pytest.xfail( - "rdar://96862073 (test_control_folw::TestWhileLoop::test_builder_to_backend_nested failing on nnv1)" + "rdar://96862073 (test_control_flow::TestWhileLoop::test_builder_to_backend_nested failing on nnv1)" ) input_placeholders = { diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_conv.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_conv.py index 8817aa59b..57010f313 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_conv.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_conv.py @@ -436,6 +436,21 @@ def test_builder_to_backend_stress( config, x_weight_dtype, ): + if ( + backend.backend == 'neuralnetwork' and + conv_dim == "conv2d" and + config == { + "padding": (1, 1, 1), + "DHWKdKhKw": (5, 5, 5, 2, 2, 2), + "stride": (2, 2, 2), + "dilation": (2, 1, 1), + "has_bias": True, + "groups": 1, + "symbolic": True, + } + ): + pytest.xfail("rdar://121954894: Conv2d starts to fail") + padding = config["padding"] DHWKdKhKw = config["DHWKdKhKw"] stride = config["stride"] @@ -623,6 +638,20 @@ def test_builder_to_backend_stress_weights_input( conv_dim, config, ): + if ( + conv_dim == "conv2d" and + config == { + 'padding': (1, 1, 1), + 'DHWKdKhKw': (5, 5, 5, 2, 2, 2), + 'stride': (2, 2, 2), + 'dilation': (2, 1, 1), + 'has_bias': True, + 'groups': 1, + 'symbolic': True, + } + ): + pytest.xfail("rdar://121954894: Conv2d starts to fail") + padding = config["padding"] DHWKdKhKw = config["DHWKdKhKw"] stride = config["stride"] diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_elementwise_unary.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_elementwise_unary.py index 8bdae53ac..040257d8f 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_elementwise_unary.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_elementwise_unary.py @@ -10,10 +10,9 @@ import scipy from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Function, get_new_symbol, types +from coremltools.converters.mil.mil import get_new_symbol, types from coremltools.converters.mil.mil.ops.tests.iOS14 import backends from coremltools.converters.mil.mil.ops.tests.testing_utils import run_compare_builder -from coremltools.converters.mil.mil.types.symbolic import is_compatible_symbolic_vector from coremltools.converters.mil.testing_reqs import compute_units from coremltools.converters.mil.testing_utils import ssa_fn @@ -514,19 +513,35 @@ def test_builder_threshold_eval(self): np.testing.assert_allclose(expected_outputs, v.val, atol=1e-04, rtol=1e-05) - def test_cast_with_symbolic_value(self): - input_shape = [get_new_symbol(), 1] - input_placeholders = { - "x": mb.placeholder(shape=input_shape), - } + @pytest.mark.parametrize( + "backend, dtype", + itertools.product( + backends, + ["bool", "int32", "fp16", "fp32"], + ), + ) + def test_cast_with_symbolic_value(self, backend, dtype): + s1 = get_new_symbol() - def build(x): + @mb.program( + input_specs=[mb.TensorSpec(shape=(s1, 1))], + opset_version=backend.opset_version, + ) + def prog(x): shape = mb.shape(x=x) - return mb.cast(x=shape, dtype="int32") - - with Function(input_placeholders) as ssa_func: - output_vars = build(**ssa_func.inputs) - assert is_compatible_symbolic_vector(output_vars.sym_val, [get_new_symbol(), 1]) + out = mb.cast(x=shape, dtype=dtype) + assert out.val is None + sym_val = out.sym_val + if dtype == "bool": + assert sym_val.tolist() == [s1, True] + elif dtype == "int32": + assert sym_val.tolist() == [s1, 1] + elif dtype == "fp16": + assert sym_val.tolist() == [s1, np.float16(1.0)] + else: + assert dtype == "fp32" + assert sym_val.tolist() == [s1, np.float32(1.0)] + return out @staticmethod def _test_builder_to_backend_stress_with_epsilon( @@ -634,6 +649,7 @@ def prog(): return mb.erf(x=x) ops = list(prog.functions.values())[0].operations + ops = list(ops) assert len(ops) == 2 assert ops[0].op_type == "const" erf_op = ops[1] diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_linear.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_linear.py index 0da0d8320..41b3d1d39 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_linear.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_linear.py @@ -261,6 +261,28 @@ def build(x): backend=backend, ) + @pytest.mark.parametrize( + "compute_unit, backend", + itertools.product(compute_units, backends), + ) + def test_builder_transpose_y(self, compute_unit, backend): + x_val = np.random.rand(3, 2, 7, 16) + y_val = np.random.rand(3, 2, 5, 16) + + def build(x): + return mb.matmul(x=x, y=y_val, transpose_x=False, transpose_y=True) + + expected_output = np.matmul(x_val, np.transpose(y_val, (0, 1, 3, 2))) + run_compare_builder( + build, + input_placeholders={"x": mb.placeholder(shape=x_val.shape)}, + input_values={"x": x_val}, + expected_output_types=expected_output.shape + (types.fp32,), + expected_outputs=expected_output, + compute_unit=compute_unit, + backend=backend, + ) + class TestEinsum: @pytest.mark.parametrize( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_reduction.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_reduction.py index 60ecf6b7c..b5e144c99 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_reduction.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_reduction.py @@ -335,7 +335,8 @@ def test_reduce_log_sum_exp_value_inference(self, input_size): def prog(): return mb.reduce_log_sum_exp(x=x, axes=(axis,)) - op = list(prog.functions.values())[0].operations[3] + ops = list(prog.functions.values())[0].operations + op = list(ops)[3] assert op.op_type == "reduce_log_sum_exp" np.testing.assert_allclose( op.value_inference(), scipy.special.logsumexp(x, axis=axis), atol=1e-04, rtol=1e-05 diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_scatter_gather.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_scatter_gather.py index 8c7ef8374..7c3dd2913 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_scatter_gather.py @@ -11,7 +11,7 @@ import coremltools as ct from coremltools._deps import _HAS_TF_2, MSG_TF2_NOT_FOUND from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import types +from coremltools.converters.mil.mil import get_new_symbol, types from coremltools.converters.mil.mil.ops.tests.iOS14 import backends from coremltools.converters.mil.mil.ops.tests.testing_utils import ( mark_api_breaking, @@ -488,6 +488,31 @@ def prog(x): opset_version=backend.opset_version, )(prog) + @staticmethod + def test_gather_value_inference_on_symbolic_input(): + + s1, s2 = get_new_symbol(), get_new_symbol() + + @mb.program( + input_specs=[mb.TensorSpec(shape=(2, 3, s1, s2, 5))], + ) + def prog(x): + shape = mb.shape(x=x) + gather_1 = mb.gather(x=shape, indices=0, axis=0) + gather_2 = mb.gather(x=shape, indices=[0, 1], axis=0) + gather_3 = mb.gather(x=shape, indices=[1, 2, 3], axis=0) + + # Test value inference + assert gather_1.val == 2 + assert gather_1.sym_val == 2 + + assert gather_2.val.tolist() == [2, 3] + assert gather_2.sym_val.tolist() == [2, 3] + + assert gather_3.val is None + assert gather_3.sym_val.tolist() == [3, s1, s2] + + return x class TestGatherAlongAxis: @pytest.mark.parametrize( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_transformation.py b/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_transformation.py index 1a1d31fc3..6ae83e6aa 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_transformation.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS14/test_tensor_transformation.py @@ -1268,6 +1268,63 @@ def prog(): assert const.val[0, 0, 0] == 112 return x + @staticmethod + def test_squeeze_invalid_axis(): + with pytest.raises( + ValueError, match="Invalid axis 3 in squeeze. The axis should be smaller than 3" + ): + + @mb.program() + def prog(): + const = mb.const(val=[[[2, 3], [4, 5]]]) + x = mb.squeeze(x=const, axes=(3,)) + return x + + @pytest.mark.parametrize( + "compute_unit, backend, is_symbolic", + itertools.product( + compute_units, + backends, + (True, False), + ), + ) + def test_non_single_element_dim(self, compute_unit, backend, is_symbolic): + if backend.backend == "neuralnetwork": + pytest.skip("neuralnetwork backend doesn't support squeeze a not-1 dimension") + if compute_unit == ct.ComputeUnit.CPU_ONLY: + pytest.xfail("CPU failed non-single-dim squeeze (rdar://124555262)") + + x = np.arange(2 * 3 * 4, dtype=np.int32).reshape(2, 3, 4) + input_shape = ( + [get_new_symbol(), get_new_symbol(), get_new_symbol()] if is_symbolic else x.shape + ) + input_placeholders = {"x": mb.placeholder(shape=input_shape)} + input_values = {"x": x} + + def build(x): + return [ + mb.squeeze(x=x, axes=(-1,)), + mb.squeeze(x=x, axes=(-2, 0)), + mb.squeeze(x=x, axes=(0, 1, 2)), + mb.squeeze(x=x), + ] + + # The symbolic dim won't be squeezed, so it doesn't affect the output. + expected_output_types = [tuple(input_shape) + (types.int32,)] * 4 + expected_outputs = [x] * 4 + run_compare_builder( + build, + input_placeholders, + input_values, + expected_output_types, + expected_outputs, + inputs=construct_inputs_from_placeholders(input_placeholders, 10) + if backend.backend == "mlprogram" + else None, + compute_unit=compute_unit, + backend=backend, + ) + class TestTranspose: @pytest.mark.parametrize( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS16/test_constexpr_ops.py b/coremltools/converters/mil/mil/ops/tests/iOS16/test_constexpr_ops.py index efd157257..24358362d 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS16/test_constexpr_ops.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS16/test_constexpr_ops.py @@ -8,6 +8,7 @@ import numpy as np import pytest +import coremltools as ct from coremltools.converters.mil import testing_reqs from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import types @@ -59,6 +60,40 @@ def build(x): prog = mlmodel._mil_program assert "constexpr_affine_dequantize" in get_op_types_in_program(prog) + def test_is_all_zeros(self): + @mb.program(opset_version=ct.target.iOS16) + def prog_0_scalar(): + return mb.constexpr_affine_dequantize( + quantized_data=np.array([[0, 0, 0], [0, 0, 0]]).astype(np.int8), + zero_point=np.int8(0), + scale=np.float32(1.2), + axis=0, + ) + + assert prog_0_scalar.find_ops(op_type="constexpr_affine_dequantize")[0].is_all_zeros() + + @mb.program(opset_version=ct.target.iOS16) + def prog_0_vector(): + return mb.constexpr_affine_dequantize( + quantized_data=np.array([[1, 2, 3], [1, 2, 3]]).astype(np.uint8), + zero_point=np.uint8([1, 2, 3]), + scale=np.float32(2), + axis=1, + ) + + assert prog_0_vector.find_ops(op_type="constexpr_affine_dequantize")[0].is_all_zeros() + + @mb.program(opset_version=ct.target.iOS16) + def prog_none0(): + return mb.constexpr_affine_dequantize( + quantized_data=np.array([[1, 2, 3], [1, 2, 3]]).astype(np.uint8), + zero_point=np.uint8([1, 2]), + scale=np.float32(2), + axis=0, + ) + + assert not prog_none0.find_ops(op_type="constexpr_affine_dequantize")[0].is_all_zeros() + @ssa_fn def test_builder_eval(self): # scalar zero-point & scalar scale @@ -350,6 +385,45 @@ def build(x): prog = mlmodel._mil_program assert "constexpr_lut_to_dense" in get_op_types_in_program(prog) + @pytest.mark.parametrize("backend", backends) + def test_shape_of_constexpr_is_replaceable(self, backend): + @mb.program(input_specs=[], opset_version=backend.opset_version) + def prog(): + lut_data = np.array( + [ + -19.0, + 4.0, + 0.0, + -1.0, + 1.0, + 3.0, + 5.0, + -8.0, + 19, + 13, + 42, + 4.5, + 5.4, + 2.0, + -6, + -7, + ] + ).astype(np.float32) + indices = np.array([212, 21]).astype(np.uint8) + shape = np.array([4, 1]).astype(np.uint32) + y = mb.constexpr_lut_to_dense(lut=lut_data, indices=indices, shape=shape) + shape = mb.shape(x=y) + assert len(shape.nonreplaceable_vars_upstream) == 0 + gather = mb.gather( + x=shape, + indices=[ + 0, + ], + axis=0, + ) + assert len(gather.nonreplaceable_vars_upstream) == 0 + return gather + @ssa_fn def test_builder_eval(self): v = mb.constexpr_lut_to_dense( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS16/test_scatter_gather.py b/coremltools/converters/mil/mil/ops/tests/iOS16/test_scatter_gather.py index 7d6fa1fbb..640e35df4 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS16/test_scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS16/test_scatter_gather.py @@ -24,29 +24,40 @@ class TestGather: @pytest.mark.parametrize( - "compute_unit, backend, x_dtype, indices_dtype", + "compute_unit, backend, x_dtype, indices_dtype, indices_dynamic", itertools.product( compute_units, backends, [np.float32, np.float16, np.int32], [np.int32, np.int16, np.uint16], + [True, False], ), ) - def test_builder_to_backend_smoke(self, compute_unit, backend, x_dtype, indices_dtype): + def test_builder_to_backend_smoke( + self, compute_unit, backend, x_dtype, indices_dtype, indices_dynamic + ): x = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=x_dtype) indices = np.array([[[1, 0], [0, 1]], [[1, 0], [0, 0]]], dtype=indices_dtype) builtin_x_dtype = types.numpy_type_to_builtin_type(x_dtype) - input_placeholders = { - "x": mb.placeholder(shape=x.shape, dtype=builtin_x_dtype), - "indices": mb.placeholder( + input_placeholders = {"x": mb.placeholder(shape=x.shape, dtype=builtin_x_dtype)} + input_values = {"x": x} + if indices_dynamic: + input_placeholders["indices"] = mb.placeholder( shape=indices.shape, dtype=types.numpy_type_to_builtin_type(indices_dtype) - ), - } + ) + input_values["indices"] = indices - input_values = {"x": x, "indices": indices} + def build_dynamic(x, indices): + return [ + mb.gather(x=x, indices=indices, axis=1, batch_dims=0), + mb.gather(x=x, indices=indices, axis=1, batch_dims=1), + mb.gather(x=x, indices=indices, axis=2, batch_dims=0), + mb.gather(x=x, indices=indices, axis=2, batch_dims=1), + mb.gather(x=x, indices=indices, axis=2, batch_dims=2), + ] - def build(x, indices): + def build_static(x): return [ mb.gather(x=x, indices=indices, axis=1, batch_dims=0), mb.gather(x=x, indices=indices, axis=1, batch_dims=1), @@ -55,6 +66,8 @@ def build(x, indices): mb.gather(x=x, indices=indices, axis=2, batch_dims=2), ] + build = build_dynamic if indices_dynamic else build_static + expected_output_types = [ (2, 2, 2, 2, 3, builtin_x_dtype), (2, 2, 2, 3, builtin_x_dtype), diff --git a/coremltools/converters/mil/mil/ops/tests/iOS17/test_conv.py b/coremltools/converters/mil/mil/ops/tests/iOS17/test_conv.py index 65f68b116..b6f67b427 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS17/test_conv.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS17/test_conv.py @@ -8,6 +8,7 @@ import numpy as np import pytest +import coremltools as ct from coremltools._deps import _HAS_TORCH, MSG_TORCH_NOT_FOUND from coremltools.converters.mil.mil.ops.tests.iOS14.test_conv import TestConv as _TestConvIos14 from coremltools.converters.mil.mil.ops.tests.iOS14.test_conv import ( @@ -87,6 +88,25 @@ def test_builder_to_backend_stress( config, x_weight_dtype, ): + if ( + backend.backend == "mlprogram" + and backend.precision == "fp16" + and backend.opset_version == ct.target.iOS17 + and conv_dim == "conv2d" + and config + == { + "padding": (1, 1, 1), + "DHWKdKhKw": (5, 5, 5, 2, 2, 2), + "stride": (2, 2, 2), + "dilation": (2, 1, 1), + "has_bias": True, + "groups": 1, + "symbolic": True, + } + and x_weight_dtype == (np.float32, np.float16) + ): + pytest.xfail("rdar://124260627 ([CI] Two tests are random failing on CI)") + super().test_builder_to_backend_stress( compute_unit, backend, conv_dim, config, x_weight_dtype ) diff --git a/coremltools/converters/mil/mil/ops/tests/iOS17/test_elementwise_unary.py b/coremltools/converters/mil/mil/ops/tests/iOS17/test_elementwise_unary.py index a41ac776f..57e6ec90b 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS17/test_elementwise_unary.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS17/test_elementwise_unary.py @@ -11,7 +11,7 @@ import coremltools as ct from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import types +from coremltools.converters.mil.mil import get_new_symbol, types from coremltools.converters.mil.mil.ops.tests.iOS14.test_elementwise_unary import ( TestElementwiseUnary as _TestElementwiseUnary_iOS14, ) @@ -63,6 +63,28 @@ def prog(): cast_op = main_func.find_ops(op_type="cast")[0] np.testing.assert_allclose(expected_res, cast_op.outputs[0].val, atol=1e-04, rtol=1e-05) + @pytest.mark.parametrize( + "backend, dtype", + itertools.product( + backends, + ["int8", "uint8", "int16", "uint16"], + ), + ) + def test_cast_with_symbolic_value_iOS17(self, backend, dtype): + s1 = get_new_symbol() + + @mb.program( + input_specs=[mb.TensorSpec(shape=(s1, 1))], + opset_version=backend.opset_version, + ) + def prog(x): + shape = mb.shape(x=x) + out = mb.cast(x=shape, dtype=dtype) + assert out.val is None + sym_val = out.sym_val + assert sym_val.tolist() == [s1, 1] + return out + @pytest.mark.parametrize( "compute_unit, backend, src_dtype, dst_dtype", itertools.product( diff --git a/coremltools/converters/mil/mil/ops/tests/iOS17/test_linear.py b/coremltools/converters/mil/mil/ops/tests/iOS17/test_linear.py index ce014a3a1..d2e687521 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS17/test_linear.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS17/test_linear.py @@ -13,8 +13,11 @@ from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.ops.tests.iOS17 import backends from coremltools.converters.mil.mil.ops.tests.testing_utils import run_compare_builder -from coremltools.converters.mil.mil.types import builtin_to_string, nptype_from_builtin -from coremltools.converters.mil.mil.types.type_mapping import numpy_type_to_builtin_type +from coremltools.converters.mil.mil.types import ( + builtin_to_string, + nptype_from_builtin, + numpy_type_to_builtin_type, +) from coremltools.converters.mil.testing_reqs import compute_units diff --git a/coremltools/converters/mil/mil/ops/tests/iOS17/test_quantization.py b/coremltools/converters/mil/mil/ops/tests/iOS17/test_quantization.py index 77b890b7e..cae1135c5 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS17/test_quantization.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS17/test_quantization.py @@ -313,7 +313,11 @@ def test_builder_eval_scalar_params(self): zero_point=np.uint8(1), scale=np.float32(2), ) - np.testing.assert_allclose(np.float32([[0, 2, 4], [0, 2, 4]]), v.val) + assert v.val is None + np.testing.assert_allclose( + np.float32([[0, 2, 4], [0, 2, 4]]), + v.op.materialized_val_inference(), + ) @ssa_fn def test_builder_eval_vector_params(self): @@ -323,8 +327,10 @@ def test_builder_eval_vector_params(self): scale=np.array([1, 2]).astype(np.float32), axis=3, ) + assert v.val is None np.testing.assert_allclose( - np.array([1, 2, 3, 4]).reshape(1, 1, 2, 2).astype(np.float32), v.val + np.array([1, 2, 3, 4]).reshape(1, 1, 2, 2).astype(np.float32), + v.op.materialized_val_inference(), ) @ssa_fn @@ -333,7 +339,11 @@ def test_builder_eval_no_zero_point(self): input=np.array([[0, 1, 2], [0, 1, 2]]).astype(np.int8), scale=np.float32(2), ) - np.testing.assert_allclose(np.float32([[0, 2, 4], [0, 2, 4]]), v.val) + assert v.val is None + np.testing.assert_allclose( + np.float32([[0, 2, 4], [0, 2, 4]]), + v.op.materialized_val_inference(), + ) @pytest.mark.parametrize("compute_unit, backend", itertools.product(compute_units, backends)) def test_smoke_builder_to_backend_dequantize_per_tensor(self, compute_unit, backend): diff --git a/coremltools/converters/mil/mil/ops/tests/iOS17/test_scatter_gather.py b/coremltools/converters/mil/mil/ops/tests/iOS17/test_scatter_gather.py index 6d5d0cdeb..b4b4edca5 100644 --- a/coremltools/converters/mil/mil/ops/tests/iOS17/test_scatter_gather.py +++ b/coremltools/converters/mil/mil/ops/tests/iOS17/test_scatter_gather.py @@ -249,16 +249,21 @@ def build_dynamic(data, indices, updates): class TestGather(_TestGatherIOS16): @pytest.mark.parametrize( - "compute_unit, backend, x_dtype, indices_dtype", + "compute_unit, backend, x_dtype, indices_dtype, indices_dynamic", itertools.product( compute_units, backends, [np.float32, np.float16, np.int32, np.int16, np.uint16, np.int8, np.uint8], [np.int32, np.int16, np.uint16, np.int8, np.uint8], + [True, False], ), ) - def test_builder_to_backend_smoke(self, compute_unit, backend, x_dtype, indices_dtype): - super().test_builder_to_backend_smoke(compute_unit, backend, x_dtype, indices_dtype) + def test_builder_to_backend_smoke( + self, compute_unit, backend, x_dtype, indices_dtype, indices_dynamic + ): + super().test_builder_to_backend_smoke( + compute_unit, backend, x_dtype, indices_dtype, indices_dynamic + ) @pytest.mark.parametrize( "backend, indices_val, validate_indices", @@ -291,6 +296,25 @@ def prog(x): opset_version=backend.opset_version, )(prog) + @pytest.mark.parametrize( + "backend, indices_val", + itertools.product(backends, [0, 1]), + ) + def test_builder_scalar_indices(self, backend, indices_val): + @mb.program(input_specs=[], opset_version=backend.opset_version) + def prog(): + params = np.array([1, 2, 3, 4], dtype=np.int32) + indices = np.array(indices_val, dtype=np.int32) + res = mb.gather( + x=params, indices=indices_val, axis=0, batch_dims=0, validate_indices=False + ) + return res + + main_func = prog.functions["main"] + gather_op = main_func.find_ops(op_type="gather")[0] + assert gather_op.outputs[0].val == 1 if indices_val == 0 else 2 + assert gather_op.outputs[0].dtype == types.int32 + class TestGatherAlongAxis: @pytest.mark.parametrize( diff --git a/coremltools/converters/mil/mil/ops/tests/testing_utils.py b/coremltools/converters/mil/mil/ops/tests/testing_utils.py index 4fa5b93ad..2bce2e551 100644 --- a/coremltools/converters/mil/mil/ops/tests/testing_utils.py +++ b/coremltools/converters/mil/mil/ops/tests/testing_utils.py @@ -10,8 +10,9 @@ import coremltools as ct from coremltools import _logger as logger +from coremltools.converters.mil import mil from coremltools.converters.mil.input_types import TensorType -from coremltools.converters.mil.mil import Function, Placeholder, Program +from coremltools.converters.mil.mil import Function, Placeholder from coremltools.converters.mil.mil.passes.pass_pipeline import PassPipeline from coremltools.converters.mil.mil.types.symbolic import is_symbolic from coremltools.converters.mil.testing_reqs import BackendConfig @@ -119,7 +120,7 @@ def run_compare_builder( if expected_outputs is not None and not isinstance(expected_outputs, list): expected_outputs = [expected_outputs] - prog = Program() + prog = mil.Program() with Function(input_placeholders, opset_version=minimum_deployment_target) as ssa_func: output_vars = build(**ssa_func.inputs) if isinstance(output_vars, tuple): diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/__init__.py b/coremltools/converters/mil/mil/passes/defs/cleanup/__init__.py index 32aeb2f5f..5442e9710 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/__init__.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/__init__.py @@ -7,6 +7,7 @@ from .const_elimination import const_elimination from .dead_code_elimination import dead_code_elimination from .dedup_op_and_var_names import dedup_op_and_var_names +from .expand_dynamic_linear import expand_dynamic_linear from .fuse_reduce_mean import fuse_reduce_mean from .loop_invariant_elimination import loop_invariant_elimination from .noop_elimination import noop_elimination diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/const_deduplication.py b/coremltools/converters/mil/mil/passes/defs/cleanup/const_deduplication.py index 5875fed55..020ab294c 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/const_deduplication.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/const_deduplication.py @@ -65,14 +65,13 @@ def remove_duplicate_ops( for duplicate in unique2duplicates[unique]: if duplicate in block.outputs: continue - op = duplicate.op block.replace_uses_of_var_after_op( - anchor_op=op, + anchor_op=duplicate.op, old_var=duplicate, new_var=unique, force_replace=force_replace, ) - block.remove_ops([op]) + block.remove_ops([duplicate.op]) @block_context_manager def _constant_deduplication_block(self, block: Block) -> None: diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/dead_code_elimination.py b/coremltools/converters/mil/mil/passes/defs/cleanup/dead_code_elimination.py index bbe6578eb..b7aa1f6ff 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/dead_code_elimination.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/dead_code_elimination.py @@ -7,6 +7,7 @@ from coremltools import _logger as logger from coremltools.converters.mil.mil import Program from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass +from coremltools.converters.mil.mil.passes.helper import block_context_manager from coremltools.converters.mil.mil.passes.pass_registry import register_pass @@ -48,6 +49,7 @@ def apply(self, prog: Program): self._dead_code_elimination_block(f) @staticmethod + @block_context_manager def _dead_code_elimination_block(block): used_vars = set() ops_to_remove = list() diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/dedup_op_and_var_names.py b/coremltools/converters/mil/mil/passes/defs/cleanup/dedup_op_and_var_names.py index f20675521..639a756af 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/dedup_op_and_var_names.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/dedup_op_and_var_names.py @@ -38,7 +38,7 @@ def prog(x): def apply(self, prog): for func in prog.functions.values(): # Handle function input/outputs as they cannot be changed (to maintain user interface) - inputs = list(func.function_inputs) + inputs = list(func.inputs.values()) io_vars = set(inputs + func.outputs) self._ensure_unique_var_names(io_vars) seen_var_names = set([v.name for v in io_vars]) diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/expand_dynamic_linear.py b/coremltools/converters/mil/mil/passes/defs/cleanup/expand_dynamic_linear.py new file mode 100644 index 000000000..55ca9df71 --- /dev/null +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/expand_dynamic_linear.py @@ -0,0 +1,116 @@ +# Copyright (c) 2024, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from coremltools.converters.mil.mil import Block +from coremltools.converters.mil.mil import Builder as mb +from coremltools.converters.mil.mil import Operation, Program, Var +from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass +from coremltools.converters.mil.mil.passes.helper import block_context_manager +from coremltools.converters.mil.mil.passes.pass_registry import register_pass + + +@register_pass(namespace="common") +class expand_dynamic_linear(AbstractGraphPass): + """ + ``Linear`` requires const or constexpr ``weight`` and ``bias``. In op translation, + we ambitiously prefer ``linear`` whenever possible, i.e. translate to ``linear`` + when operand is descendant of const, since such operand may be folded / fused into + const or constexpr later on by graph passes. + + If such const folding / constexpr fusion did not happen, this pass would clean up + those too ambitious ``linear``s by replacing them with ``matmul``s + """ + + def apply(self, prog: Program) -> None: + for f in prog.functions.values(): + self._expand_dynamic_linear_block(f) + + @block_context_manager + def _expand_dynamic_linear_block(self, block: Block) -> None: + # use shallow copy to hide changes on block.operations during the loop, + # since we do not need to deal with the newly expanded matmul + add ops + for op in list(block.operations): + for b in op.blocks: + self._expand_dynamic_linear_block(b) + + if op.op_type == "linear": + self._try_expand_dynamic_linear(op, block) + + @staticmethod + def _is_operand_static(var: Var) -> bool: + if var is None: + return True + + op = var.op + if op is None: + return False + + op_type = op.op_type + return op_type == "const" or op_type.startswith("constexpr_") + + def _try_expand_dynamic_linear(self, op: Operation, block: Block) -> None: + assert op.op_type == "linear", "Should only apply to linear op" + + is_weight_static = self._is_operand_static(op.weight) + is_bias_static = self._is_operand_static(op.bias) + + if is_weight_static: + if is_bias_static: + # static weight and bias, linear is good + return + else: + # static weight with dynamic bias, so linear for weight matmul + add for bias add + matmul = mb.linear(x=op.x, weight=op.weight, before_op=op) + add = mb.add(x=matmul, y=op.bias, before_op=op, name=op.name) + block.replace_uses_of_var_after_op( + anchor_op=op, + old_var=op.outputs[0], + new_var=add, + ) + op.remove_from_block() + else: + # dynamic weight, have to expand to at least matmul + result = mb.matmul(x=op.x, y=op.weight, transpose_y=True, before_op=op) + # static bias, try skipping add if all zero + if is_bias_static: + force_replace = False + # if no bias provided, default to 0, can skip + # if bias provided, need to inspect its value + if op.bias is not None: + bias_op = op.bias.op + bias_op_type = bias_op.op_type + if bias_op_type == "const": + is_nonzero_bias = np.any(op.bias.val != 0) + else: + if bias_op_type == "constexpr_affine_dequantize": + is_nonzero_bias = not bias_op.is_all_zeros() + # cowardly treat other types of compressed bias as if nonzero + else: + is_nonzero_bias = True + # For such a compressed all-zero bias, if we skip add, then + # the result (matmul output) would only descend from weight but not bias, + # i.e. need to force replacing descendant of bias + if not is_nonzero_bias: + force_replace = True + if is_nonzero_bias: + result = mb.add(x=result, y=op.bias, before_op=op, name=op.name) + block.replace_uses_of_var_after_op( + anchor_op=op, + old_var=op.outputs[0], + new_var=result, + force_replace=force_replace, + ) + op.remove_from_block() + # dynamic bias, have to further expand to matmul + add + else: + result = mb.add(x=result, y=op.bias, before_op=op, name=op.name) + block.replace_uses_of_var_after_op( + anchor_op=op, + old_var=op.outputs[0], + new_var=result, + ) + op.remove_from_block() diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/fuse_reduce_mean.py b/coremltools/converters/mil/mil/passes/defs/cleanup/fuse_reduce_mean.py index 58f913538..33a1b89a7 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/fuse_reduce_mean.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/fuse_reduce_mean.py @@ -108,8 +108,11 @@ def _try_to_transform(reduce_sum_op, block): @block_context_manager def _fuse_reduce_mean_block(self, block): - fusion_status = False - for i, op in enumerate(list(block.operations)): + fusion_occurred = False + for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -119,8 +122,6 @@ def _fuse_reduce_mean_block(self, block): # start pattern match if mul op is encountered if op.op_type == "reduce_sum": - fusion_status = self._try_to_transform(op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(op, block): + fusion_occurred = True + return fusion_occurred diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/loop_invariant_elimination.py b/coremltools/converters/mil/mil/passes/defs/cleanup/loop_invariant_elimination.py index 774c6b208..f25945fdb 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/loop_invariant_elimination.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/loop_invariant_elimination.py @@ -68,11 +68,8 @@ def _detect_loop_invariants(while_op): # this block output is a var from outside of the block enclosing_block = while_op.enclosing_block - while_op_id = enclosing_block.find_op_id_in_block(while_op) - output_from_outside_of_block = ( - True - if enclosing_block.is_var_visible_in_block(vx_out, upto_op_with_id=while_op_id) - else False + output_from_outside_of_block = enclosing_block.is_var_visible_in_block( + vx_out, upto_op=while_op ) if return_input_as_output or output_from_outside_of_block: loop_invariant_ids.append(i) diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/noop_elimination.py b/coremltools/converters/mil/mil/passes/defs/cleanup/noop_elimination.py index 8aee02bbf..21f9fb00f 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/noop_elimination.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/noop_elimination.py @@ -85,10 +85,8 @@ def has_all_elements_equal_to(var, value): if has_all_elements_equal_to(op.x, x): input_var = op.y - input_op = input_var.op elif has_all_elements_equal_to(op.y, y): input_var = op.x - input_op = input_var.op else: return False @@ -100,7 +98,7 @@ def has_all_elements_equal_to(var, value): return False if op.enclosing_block.try_replace_uses_of_var_after_op( - anchor_op=input_op, + anchor_op=op, old_var=op.outputs[0], new_var=input_var, ): @@ -132,13 +130,10 @@ def remove_slice_by_index(op): if any([x < 0 for x in stride]): return False - input_var = op.x - input_op = input_var.op - if op.enclosing_block.try_replace_uses_of_var_after_op( - anchor_op=input_op, + anchor_op=op, old_var=op.outputs[0], - new_var=input_var, + new_var=op.x, ): op.enclosing_block.remove_ops([op]) return True @@ -151,13 +146,10 @@ def remove_same_shape(op): if input_shape != output_shape: return False - input_var = op.x - input_op = input_var.op - if op.enclosing_block.try_replace_uses_of_var_after_op( - anchor_op=input_op, + anchor_op=op, old_var=op.outputs[0], - new_var=input_var, + new_var=op.x, ): op.enclosing_block.remove_ops([op]) return True @@ -167,13 +159,10 @@ def remove_linear(op): if op.alpha.val != 1 or op.beta.val != 0: return False - input_var = op.x - input_op = input_var.op - if op.enclosing_block.try_replace_uses_of_var_after_op( - anchor_op=input_op, + anchor_op=op, old_var=op.outputs[0], - new_var=input_var, + new_var=op.x, ): op.enclosing_block.remove_ops([op]) return True @@ -185,13 +174,10 @@ def remove_transpose(op): if (perm != sorted_perm).any(): return False - input_var = op.x - input_op = input_var.op - if op.enclosing_block.try_replace_uses_of_var_after_op( - anchor_op=input_op, + anchor_op=op, old_var=op.outputs[0], - new_var=input_var, + new_var=op.x, ): op.enclosing_block.remove_ops([op]) return True @@ -218,7 +204,6 @@ def remove_transpose(op): "crop": remove_same_shape, "linear_activation": remove_linear, } - # abort if op output is a block output if op.outputs[0] in op.enclosing_block.outputs: return None @@ -234,7 +219,11 @@ def remove_transpose(op): @block_context_manager def _noop_elimination_block_wrapper(self, block): def _noop_elimination_block(block): + status = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -243,12 +232,9 @@ def _noop_elimination_block(block): continue remove_fn = noop_elimination._match_pattern(op) - if remove_fn is not None: - status = remove_fn(op) - # has to break as the downstream iterator is affected. - if status: - return status - return False + if remove_fn is not None and remove_fn(op): + status = True + return status block_changed = True while block_changed: diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/remove_redundant_ops.py b/coremltools/converters/mil/mil/passes/defs/cleanup/remove_redundant_ops.py index a266dd1c7..c35e5404f 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/remove_redundant_ops.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/remove_redundant_ops.py @@ -4,10 +4,11 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause import collections +from typing import Dict, List import numpy as np -from coremltools.converters.mil.mil import Var +from coremltools.converters.mil.mil import Block, Operation, Var from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass from coremltools.converters.mil.mil.passes.helper import block_context_manager from coremltools.converters.mil.mil.passes.pass_registry import register_pass @@ -54,7 +55,14 @@ class remove_redundant_ops(AbstractGraphPass): _NON_REDUNDANT_OPS = tuple() + def __init__(self): + self._num_of_visited_ops: int = ( + 0 # Testing purpose, making sure the algorithm performs in O(N) + ) + self._ops_order: Dict[Block, Dict[Operation, int]] = {} + def apply(self, prog): + self._num_of_visited_ops = 0 for f in prog.functions.values(): self._remove_redundant_ops_in_block_wrapper(f) @@ -69,21 +77,20 @@ def _is_op_eligible_to_be_removed(op): else: return True - @staticmethod - def _get_candidate_ops_list(prospective_ops_list): + def _get_candidate_ops_list(self, prospective_ops_list: List[Operation]) -> List[Operation]: od = collections.OrderedDict() - enclosing_block = [op.enclosing_block for op in prospective_ops_list] - if len(set(enclosing_block)) > 1: # all candidate ops must belong to the same block + enclosing_blocks = [op.enclosing_block for op in prospective_ops_list] + if len(set(enclosing_blocks)) > 1: # all candidate ops must belong to the same block return [] for op in prospective_ops_list: if remove_redundant_ops._is_op_eligible_to_be_removed(op): - od[op] = enclosing_block[0].operations.index(op) + od[op] = self._ops_order[enclosing_blocks[0]][op] + # Sort the ops according to their index of appearing in block.operations, which is # topologically sorted return [x[0] for x in sorted(od.items(), key=lambda t: t[1])] - @staticmethod - def _get_candidate_ops_lists_from_var(var): + def _get_candidate_ops_lists_from_var(self, var: Var) -> List[List[Operation]]: """ Return a list of lists. Each element is a list of a subset of the child ops of var, which satisfies the following conditions: @@ -103,7 +110,7 @@ def _get_candidate_ops_lists_from_var(var): for v in op_types_to_ops.values(): if len(v) > 1: - candidate_ops_list = remove_redundant_ops._get_candidate_ops_list(v) + candidate_ops_list = self._get_candidate_ops_list(v) if len(candidate_ops_list) > 1: candidate_ops_lists.append(candidate_ops_list) @@ -184,6 +191,9 @@ def _try_to_remove_ops(candidate_ops_list): first_op = candidate_ops_list[0] block = first_op.enclosing_block + if block is None: + return False + # currently, we only consider the cases when the op has 1 output. # The replace var logic below only handles the single output case. if len(first_op.outputs) > 1: @@ -191,6 +201,8 @@ def _try_to_remove_ops(candidate_ops_list): ops_to_remove = [] for op in candidate_ops_list[1:]: + if op.enclosing_block is None: + continue if op.outputs[0] not in block.outputs: # to make sure we don't remove an output op if remove_redundant_ops._are_ops_identical(first_op, op): ops_to_remove.append(op) @@ -212,25 +224,44 @@ def _try_to_remove_ops(candidate_ops_list): block.remove_ops(ops_removed) return True - @staticmethod - def _try_to_transform(parent_var): + def _try_to_transform(self, parent_var: Var) -> bool: """ scan the children ops to parent_var, to find and remove identical ops, if any. Returns True, if successful in finding such redundant ops. """ - candidate_ops_lists = remove_redundant_ops._get_candidate_ops_lists_from_var(parent_var) + candidate_ops_lists = self._get_candidate_ops_lists_from_var(parent_var) block_changed = False for ops_list in candidate_ops_lists: # Iterate through the child ops list, to make sure that we check all possible combinations. for idx in range(len(ops_list)): if remove_redundant_ops._try_to_remove_ops(ops_list[idx:]): + # We shoud not break right alway, so that we can keep + # the time complexity low. block_changed = True - break + return block_changed @block_context_manager def _remove_redundant_ops_in_block_wrapper(self, block): + def _cache_topological_order_of_ops_in_block(block: Block): + if block in self._ops_order: + return + + self._ops_order[block] = {} + for i, op in enumerate(block.operations): + for b in op.blocks: + _cache_topological_order_of_ops_in_block(b) + self._ops_order[block][op] = i + def _remove_redundant_ops_in_block(block): + # cache the topological order of the ops, + # so that we would not to query the index every single time. + # Note that, the transformation in this particular graph pass + # is going to preserve the topological order. And that is the + # reason why we can do the cache in the very beginning. + _cache_topological_order_of_ops_in_block(block) + + # iterate over the block inputs if isinstance(block.inputs, dict): block_input_var_list = list(block.inputs.values()) elif isinstance(block.inputs, (list, tuple)): @@ -238,17 +269,19 @@ def _remove_redundant_ops_in_block(block): else: raise ValueError("Unrecognized type of block.inputs, its neither a list nor dict.") - # iterate over the block inputs for input_var in block_input_var_list: if len(input_var.child_ops) > 1: self._try_to_transform(input_var) # iterate over the ops in the block graph_updated = False - for op in block.operations: + for op in list(block.operations): + if op.op_type == "const": continue + self._num_of_visited_ops += 1 + for b in op.blocks: block_changed = True while block_changed: @@ -257,12 +290,13 @@ def _remove_redundant_ops_in_block(block): if len(op.outputs) > 0 and len(op.outputs[0].child_ops) > 1: # currently, we only check the first output of the op # this can be extended, if required, to check for other outputs. - graph_updated = self._try_to_transform(op.outputs[0]) - # has to break as the downstream iterator is affected. - if graph_updated: - return graph_updated + if self._try_to_transform(op.outputs[0]): + # we don't need to break right away, in order to + # keep the time complexity fast. + graph_updated = True return graph_updated block_changed = True while block_changed: + self._ops_order = {} block_changed = _remove_redundant_ops_in_block(block) diff --git a/coremltools/converters/mil/mil/passes/defs/cleanup/topological_reorder.py b/coremltools/converters/mil/mil/passes/defs/cleanup/topological_reorder.py index afbc88ee7..6e12a2c27 100644 --- a/coremltools/converters/mil/mil/passes/defs/cleanup/topological_reorder.py +++ b/coremltools/converters/mil/mil/passes/defs/cleanup/topological_reorder.py @@ -8,6 +8,7 @@ from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass from coremltools.converters.mil.mil.passes.helper import block_context_manager from coremltools.converters.mil.mil.passes.pass_registry import register_pass +from coremltools.converters.mil.mil.utils import CacheDoublyLinkedList @register_pass(namespace="common") @@ -67,7 +68,7 @@ class topological_reorder(AbstractGraphPass): """ def apply(self, prog): - for f_name, f in prog.functions.items(): + for f in prog.functions.values(): self._move_operations_to_the_end_block(f, ["cast", "transpose"]) @staticmethod @@ -84,9 +85,10 @@ def _move_operations_to_the_end_block(block, op_type_to_move): # - set[Var]: Set of vars consumed in block (or returned as block output) # first_use maps var to (index, op) representing the first op in block.operation that consumes this var. + block.operations = list(block.operations) first_use = {} # var -> op ops_to_remove = [] # list of ops to be deleted at the end of pass - for index, op in enumerate(reversed(block.operations[:])): + for op in reversed(block.operations): current_op = op if op.op_type in op_type_to_move: @@ -118,7 +120,7 @@ def _move_operations_to_the_end_block(block, op_type_to_move): for old_output_var, new_output_var in zip(op.outputs, new_var): block.replace_uses_of_var_after_op( - anchor_op=None, old_var=old_output_var, new_var=new_output_var + anchor_op=op, old_var=old_output_var, new_var=new_output_var ) # Collect input vars from sub-block if present @@ -161,6 +163,7 @@ def _move_operations_to_the_end_block(block, op_type_to_move): first_use[v] = current_op # Remove ops that are reordered + block.operations = CacheDoublyLinkedList(block.operations) block.remove_ops(ops_to_remove) # Returns set of vars consumed in current block diff --git a/coremltools/converters/mil/mil/passes/defs/lower_complex_dialect_ops.py b/coremltools/converters/mil/mil/passes/defs/lower_complex_dialect_ops.py index ed36d87f3..c78665870 100644 --- a/coremltools/converters/mil/mil/passes/defs/lower_complex_dialect_ops.py +++ b/coremltools/converters/mil/mil/passes/defs/lower_complex_dialect_ops.py @@ -162,8 +162,10 @@ def _calculate_dft_matrix( weight matrix consisting of only the first (n_fft // 2 + 1) values. """ n_fft = mb.cast(x=n_fft, dtype="fp32", before_op=before_op) - half = mb.floor_div(x=n_fft, y=2., before_op=before_op) - half = mb.add(x=half, y=1., before_op=before_op) + + if onesided: + half = mb.floor_div(x=n_fft, y=2.0, before_op=before_op) + half = mb.add(x=half, y=1.0, before_op=before_op) tmp_x = mb.range_1d(start=0.0, end=(half if onesided else n_fft), step=1.0, before_op=before_op) tmp_y = mb.range_1d(start=0.0, end=n_fft, step=1.0, before_op=before_op) @@ -171,15 +173,15 @@ def _calculate_dft_matrix( # Use MIL ops to calculate base = torch.outer(tmp, tmp) * (2 * torch.pi / N). tmp_x = mb.reshape(x=tmp_x, shape=[-1, 1], before_op=before_op) tmp_y = mb.reshape(x=tmp_y, shape=[1, -1], before_op=before_op) - + base = mb.matmul(x=tmp_x, y=tmp_y, before_op=before_op) base = mb.mul(x=base, y=2 * np.pi, before_op=before_op) base = mb.real_div(x=base, y=n_fft, before_op=before_op) - + # Get real part and imaginary part separately. cos_base = mb.cos(x=base, before_op=before_op) sin_base = mb.sin(x=base, before_op=before_op) - + return cos_base, sin_base def _fft_1d( @@ -233,7 +235,7 @@ def _fft_1d( N = transposed_input_real.shape[0] reshaped_input_real = mb.reshape(x=transposed_input_real, shape=[N, -1], before_op=before_op) reshaped_input_imag = mb.reshape(x=transposed_input_imag, shape=[N, -1], before_op=before_op) - + N = mb.cast(x=N, dtype="fp32", before_op=before_op) cos_base, sin_base = _calculate_dft_matrix(N, onesided=False, before_op=before_op) @@ -342,7 +344,7 @@ def _stft( n_fft, onesided=is_onesided, before_op=before_op) - + # create a window of centered 1s of the requested size if win_length: n_left = (n_fft.val - win_length.val) // 2 @@ -352,7 +354,7 @@ def _stft( if not window: window = mb.fill(shape=(win_length.val,), value=1., before_op=before_op) right = mb.fill(shape=(n_right,), value=0., before_op=before_op) - + # concatenate window = mb.concat(values=(left, window, right), axis=0, before_op=before_op) @@ -602,10 +604,10 @@ def _lower_complex_stft(op: Operation): raise ValueError("Onesided is only valid for real inputs") real, imag = _stft( - op.input.real if is_complex else op.input, - op.input.imag if is_complex else None, + op.input.real if is_complex else op.input, + op.input.imag if is_complex else None, op.n_fft, op.hop_length, op.win_length, op.window, op.normalized, op.onesided, before_op=op) - + return _wrap_complex_output(op.outputs[0], real, imag) @@ -637,16 +639,8 @@ def _match_and_replace_dialect_op(block, op): @block_context_manager def _lower_complex_dialect_ops_in_block(block): - def help_lower_complex_dialect_ops(block): - for op in list(block.operations): - if _match_and_replace_dialect_op(block, op): - return True - return False - - block_changed = True - while block_changed: - block_changed = help_lower_complex_dialect_ops(block) - + for op in list(block.operations): + _match_and_replace_dialect_op(block, op) @register_pass(namespace="common") class lower_complex_dialect_ops(AbstractGraphPass): diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_activation.py b/coremltools/converters/mil/mil/passes/defs/optimize_activation.py index 84f8c44ec..fb0e5991f 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_activation.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_activation.py @@ -154,19 +154,21 @@ def _try_to_transform(op, block): def _fuse_gelu_exact_block(self, block): fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: block_changed = self._fuse_gelu_exact_block(b) + if len(op.blocks) > 0: # This op can't be real_div or mul continue if op.op_type in ["mul", "real_div"]: - fusion_occurred = self._try_to_transform(op, block) - # has to break as the downstream iterator is affected. - if fusion_occurred: - return fusion_occurred + if self._try_to_transform(op, block): + fusion_occurred = True return fusion_occurred @@ -179,21 +181,21 @@ class fuse_gelu_tanh_approximation(AbstractGraphPass): The implementation of this pass uses the generic graph pattern matching and transform algorithm implemented in ``coremltools.converters.mil.experimental.passes.generic_pass_infrastructure`` and documented in ``coremltools/converters/mil/experimental/passes/readme.md``. - + `Graph for` ``get_gelu_pattern1()`` - + ``y = x * (0.5 * (tanh(((.0447)x^3 + x ) * sqrt(2/pi)) + 1))`` .. code-block:: - [...] -----> pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) ----> mul (0.5) -----> mul ---> [...] - | ^ ^ - | | | - |------------------------------------------------------------------------------------------------------------------------ + [...] -----> pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) ----> mul (0.5) -----> mul ---> [...] + | ^ ^ + | | | + |------------------------------------------------------------------------------------------------------------------------ `Graph for` ``get_gelu_pattern2()`` - + ``y = (0.5 * x) * (tanh(((.0447)x^3 + x ) * sqrt(2/pi)) + 1)`` .. code-block:: @@ -311,7 +313,7 @@ def get_gelu_pattern2(): | ^ ^ | | | |--------------------------------------------------------- - + """ @mb.program( @@ -339,7 +341,7 @@ class fuse_leaky_relu(AbstractGraphPass): Detect the ``mul`` ---> ``max`` pattern than can be mapped to ``leaky_relu``. `In code form - Input` - + .. code-block:: %2 = const(value = alpha) # where 0 <= alpha <= 1 @@ -348,14 +350,14 @@ class fuse_leaky_relu(AbstractGraphPass): `In code form - Output` - + .. code-block:: %4 = leaky_relu(x=%1, alpha=%2) `In graphical form - Input graph` - + .. code-block:: const (val = alpha) @@ -366,7 +368,7 @@ class fuse_leaky_relu(AbstractGraphPass): `In graphical form - Output graph` - + .. code-block:: input --------> leaky_relu ---------> output @@ -423,22 +425,24 @@ def _try_to_transform(mul_op, block): @block_context_manager def _fuse_leaky_relu_block(self, block): - fusion_status = False - for i, op in enumerate(list(block.operations)): + fusion_occurred = False + for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: block_changed = self._fuse_leaky_relu_block(b) + if len(op.blocks) > 0: continue # start pattern match if mul op is encountered if op.op_type == "mul": - fusion_status = self._try_to_transform(op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(op, block): + fusion_occurred = True + return fusion_occurred class FusePreluPattern1: @@ -557,7 +561,7 @@ def transform_pattern(pattern): def get_prelu_pattern(): """ ``x1 = transpose(perm=(0,2,3,1))(x)`` - + ``y = a * relu(-1 * x1) + relu(x1)`` When ``x`` is rank 4, and ``a`` is of shape (``C,)``, ``(1, C)``, ``(1,1,C)``, or ``(1,1,1,C)``, @@ -585,7 +589,7 @@ class fuse_prelu(AbstractGraphPass): """ Detect the following patterns that can be mapped to a ``prelu`` op. Essentially, the ``prelu`` op can be broken down into the following ops: - + ``y = a * relu(-1 * x) + relu(x)`` `Pattern 1` diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_conv.py b/coremltools/converters/mil/mil/passes/defs/optimize_conv.py index 0dbbd0236..eecedf485 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_conv.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_conv.py @@ -125,7 +125,11 @@ def apply(self, prog): @block_context_manager def _compose_conv1d_block(self, block: Block): def help_compose_conv1d_block(block: Block) -> bool: + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: self._compose_conv1d_block(b) @@ -141,10 +145,9 @@ def help_compose_conv1d_block(block: Block) -> bool: # try pattern `expand_dim` -> `transpose` -> `conv2d` -> `transpose` -> `squeeze` if self._try_match_and_transform_pattern_channel_last(op, block): - # has to break as the downstream iterator is affected - return True + fusion_occurred = True - return False + return fusion_occurred block_changed = True while block_changed: @@ -168,9 +171,11 @@ def _try_match_and_transform_pattern(self, expand_op: Operation, block: Block) - return False squeeze_op = conv_op.outputs[0].child_ops[0] - # abort composition if not squeezing the dummy height + # Abort composition if not squeezing the dummy height (the extended dim_size=1 dimension) if squeeze_op.axes.rank != 1 or squeeze_op.axes.val[0] not in (-2, 2): return False + elif squeeze_op.x.shape[squeeze_op.axes.val[0]] != 1: + return False # everything looks good return self._try_apply_transform(expand_op, conv_op, squeeze_op, block) @@ -498,6 +503,9 @@ def _match_pattern(op): fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -508,10 +516,8 @@ def _match_pattern(op): bn_op = _match_pattern(op) if bn_op is not None: - fusion_occurred = self._try_to_transform(op, bn_op) - # has to break as the downstream iterator is affected. - if fusion_occurred: - return fusion_occurred + if self._try_to_transform(op, bn_op): + fusion_occurred = True return fusion_occurred @@ -796,8 +802,11 @@ def _try_to_transform(conv_op, add_op): @block_context_manager def _fuse_conv_bias_block(self, block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -809,17 +818,14 @@ def _fuse_conv_bias_block(self, block): # pattern 1 : conv + add/sub add_op = self._match_pattern(op) if add_op is not None: - fusion_status = self._try_to_transform(op, add_op) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status + if self._try_to_transform(op, add_op): + fusion_occurred = True # pattern 2 : conv + transpose + add/sub - fusion_status = self._try_to_transform_transpose_pattern(op, block) - if fusion_status: - return fusion_status + elif self._try_to_transform_transpose_pattern(op, block): + fusion_occurred = True - return fusion_status + return fusion_occurred @register_pass(namespace="common") @@ -1005,6 +1011,9 @@ def _match_pattern(op): fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -1016,10 +1025,9 @@ def _match_pattern(op): scale_op = _match_pattern(op) if scale_op is not None: - fusion_occurred = self._try_to_transform(op, scale_op) - # has to break as the downstream iterator is affected. - if fusion_occurred: - return fusion_occurred + if self._try_to_transform(op, scale_op): + fusion_occurred = True + return fusion_occurred @@ -1125,8 +1133,11 @@ def _compute_new_pad_values(transpose_op): @block_context_manager def _pad_conv_connect_block(self, block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -1137,8 +1148,6 @@ def _pad_conv_connect_block(self, block): transpose_ops = self._match_pattern(op) if transpose_ops is not None: - fusion_status = self._try_to_transform(op, transpose_ops, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(op, transpose_ops, block): + fusion_occurred = True + return fusion_occurred diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_elementwise_binary.py b/coremltools/converters/mil/mil/passes/defs/optimize_elementwise_binary.py index f0be448e4..72633392c 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_elementwise_binary.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_elementwise_binary.py @@ -170,6 +170,7 @@ def try_to_transform_const_scalar_cond(select_op: Operation, cond_val: np.ndarra result_shape = broadcast_shapes(a.shape, b.shape) # cannot simply replace with a or b if broadcasting if x.shape != result_shape: + x.op.enclosing_block.remove_ops([x.op]) return None return x @@ -332,8 +333,11 @@ def _check_shape(arr): @block_context_manager def _fuse_elementwise_to_batchnorm_block(self, block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -344,11 +348,10 @@ def _fuse_elementwise_to_batchnorm_block(self, block): add_op = self._match_pattern(op) if add_op is not None: - fusion_status = self._try_to_transform(op, add_op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(op, add_op, block): + fusion_occurred = True + + return fusion_occurred @register_pass(namespace="common") @@ -475,6 +478,9 @@ def _try_to_transform(op, block): def _rank0_expand_dims_swap(self, block): fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -484,8 +490,6 @@ def _rank0_expand_dims_swap(self, block): continue if op.op_type in ["add", "sub", "mul", "real_div", "floor_div"]: - fusion_occurred = self._try_to_transform(op, block) - # has to break as the downstream iterator is affected. - if fusion_occurred: - return fusion_occurred + if self._try_to_transform(op, block): + fusion_occurred = True return fusion_occurred diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_linear.py b/coremltools/converters/mil/mil/passes/defs/optimize_linear.py index e59103a5d..d37084b03 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_linear.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_linear.py @@ -5,8 +5,9 @@ import numpy as np +from coremltools.converters.mil.mil import Block from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Program +from coremltools.converters.mil.mil import Operation, Program, Var from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass from coremltools.converters.mil.mil.passes.helper import block_context_manager from coremltools.converters.mil.mil.passes.pass_registry import register_pass @@ -130,6 +131,9 @@ def _find_candicate_op(op): fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -140,10 +144,8 @@ def _find_candicate_op(op): add_or_sub_op = _find_candicate_op(op) if add_or_sub_op is not None: - fusion_occurred = self._try_to_transform(op, add_or_sub_op, block) - # has to break as the downstream iterator is affected. - if fusion_occurred: - return fusion_occurred + if self._try_to_transform(op, add_or_sub_op, block): + fusion_occurred = True return fusion_occurred @@ -290,8 +292,11 @@ def _try_to_transform(self, matmul_op, add_op, block): @block_context_manager def _fuse_matmul_weight_bias_block(self, block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -303,8 +308,114 @@ def _fuse_matmul_weight_bias_block(self, block): add_op = self._find_candidate_op(op) if add_op is not None: - fusion_status = self._try_to_transform(op, add_op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(op, add_op, block): + fusion_occurred = True + return fusion_occurred + + +@register_pass(namespace="common") +class fuse_transpose_matmul(AbstractGraphPass): + """ + Fuse ``transpose + matmul`` to ``matmul`` if possible, + since ``matmul`` has args ``transpose_x`` and ``transpose_y`` to transpose last 2 dims + + .. code-block:: + + Positive example: + Input graph: + transpose(x=x, perm=(1, 0)) -| + |-> matmul(x=transposed_x, y=transposed_y) + transpose(x=y, perm=(1, 0)) -| + + Output graph: + matmul(x=x, y=y, transpose_x=True, transpose_y=True) + + Negative example: + Input graph: + transpose(x=x, perm=(1, 0, 2)) -| + |-> matmul(x=transposed_x, y=transposed_y) + transpose(x=y, perm=(1, 0, 2)) -| + + Output graph: + Same to input graph, nothing changes + """ + + def apply(self, prog: Program) -> None: + for f in prog.functions.values(): + self._fuse_transpose_matmul_block(f) + + @block_context_manager + def _fuse_transpose_matmul_block(self, block: Block) -> None: + # use shallow copy to hide changes on block.operations during the loop, + # since we try fusion when loop to matmul, which will not affect downstream + for op in list(block.operations): + for b in op.blocks: + self._fuse_transpose_matmul_block(b) + + if op.op_type == "matmul": + self._try_fuse_transpose_matmul(op, block) + + @staticmethod + def is_transposed_and_fusable_to_matmul(x: Var) -> bool: + """ + 1. check if x is transposed + 2. check if x is transposed in the last 2 dimensions, + since the transpose arg in matmul only transposes the last 2 dimensions + """ + + # x is not transposed, False + if x.op is None or x.op.op_type != "transpose": + return False + + rank = x.rank + # if transposing a rank < 2 tensor, it is a noop and will be elimianted by noop_elimination + if rank < 2: + return False + + # canonicalize the input permutation to compare with last-2-dim permutation below + perm = x.op.perm.val + perm[np.where(perm < 0)] += rank + perm[-2:] -= rank + + # permuting only last 2 dims should look like (0, 1, ..., -1, -2) + perm_only_last_2_dims = np.arange(rank) + perm_only_last_2_dims[-2] = -1 + perm_only_last_2_dims[-1] = -2 + + return np.all(perm == perm_only_last_2_dims) + + def _try_fuse_transpose_matmul(self, op: Operation, block: Block) -> None: + assert op.op_type == "matmul" + + x = op.x + y = op.y + transpose_x = False if op.transpose_x is None else op.transpose_x.val + transpose_y = False if op.transpose_y is None else op.transpose_y.val + + is_x_transposed_and_fusable_to_matmul = self.is_transposed_and_fusable_to_matmul(x) + is_y_transposed_and_fusable_to_matmul = self.is_transposed_and_fusable_to_matmul(y) + # if neither x nor y is transposed and fuseable with matmul, nothing we need to do + if not is_x_transposed_and_fusable_to_matmul and not is_y_transposed_and_fusable_to_matmul: + return + + if is_x_transposed_and_fusable_to_matmul: + x = x.op.x + transpose_x = not transpose_x + if is_y_transposed_and_fusable_to_matmul: + y = y.op.x + transpose_y = not transpose_y + + fused_transpose_matmul = mb.matmul( + x=x, + y=y, + transpose_x=transpose_x, + transpose_y=transpose_y, + before_op=op, + name=op.name, + ) + block.replace_uses_of_var_after_op( + anchor_op=op, + old_var=op.outputs[0], + new_var=fused_transpose_matmul, + ) + op.remove_from_block() diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_normalization.py b/coremltools/converters/mil/mil/passes/defs/optimize_normalization.py index 30e430bb6..15bfb3c19 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_normalization.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_normalization.py @@ -27,7 +27,7 @@ class fuse_layernorm_or_instancenorm(AbstractGraphPass): are ``instance_norm``. Pattern 5 is ``layer_norm``. You can find these patterns in the methods for this class in the source code. To quickly view the source code, click the **[source]** button at the end of the class definition. - + """ _DEBUG = False # set to true to plot the block before and after the transformation @@ -93,13 +93,13 @@ def _check_child_op_types( """ Returns ``True`` for child op types matching ``child_op_types``, otherwise returns ``False``. - Parameters - ---------- + Parameters + ---------- param op : Current op. - + param child_op_type : Expected child op type. - + param check_order : Ensure child in given order, defaults to ``True``. """ if op is None or len(op.outputs) != 1: @@ -120,13 +120,13 @@ def _try_get_child_op_type( """ Returns child op if type matches, otherwise returns ``None``. - Parameters - ---------- + Parameters + ---------- param op : Current op. - + param child_op_type : Expected child op type. - + param index : Child op index. """ if op is None: @@ -185,8 +185,18 @@ def _try_apply_transform( if rank == 4 and negative_axes == [-3]: is_layernorm = (gamma_var is None and beta_var is None) or (gamma_rank == 1 and beta_rank == 1) - gamma_var = gamma_var.val if gamma_var else None - beta_var = beta_var.val if beta_var else None + + if gamma_var: + ops_to_remove.append(gamma_var.op) + gamma_var = gamma_var.val + else: + gamma_var = None + + if beta_var: + ops_to_remove.append(beta_var.op) + beta_var = beta_var.val + else: + beta_var = None if rank == 4 and (negative_axes == [-2, -1] or negative_axes == [-3, -2]): if ( @@ -219,6 +229,7 @@ def _try_apply_transform( name=out_name + "_instancenorm" if is_require_rank4_transpose else out_name, before_op=end_op, ) + ops_to_remove.extend([gamma_var.op, beta_var.op]) else: # is_layernorm x = mb.layer_norm( x=x if is_require_rank4_transpose else reduce_op.x, @@ -251,7 +262,7 @@ def _try_match_and_transform_pattern_1(self, reduce_op, block) -> bool: ``y = gamma * (x - mean) / sqrt(variance + epsilon) + beta`` ``y = x * [gamma * rsqrt(variance + eps)] + (beta - mean * [gamma * rsqrt(variance + eps)])`` - + .. code-block:: x --> reduce_mean --> sub --> square --> reduce_mean --> add(epsilon) --> rsqrt @@ -282,8 +293,8 @@ def _try_match_and_transform_pattern_1(self, reduce_op, block) -> bool: It is ``layer_norm`` if all of the following are true: - ``axes`` is either ``[-1]``, ``[-1, -2]``, or ``[-1, -2, -3]``, and so on. - ``rank`` of ``gamma`` and ``beta`` is equal to the length of the ``axes``. - - """ + + """ ops_to_remove = [] root_var = reduce_op.x @@ -398,18 +409,18 @@ def _try_match_and_transform_pattern_1(self, reduce_op, block) -> bool: def _try_match_and_transform_pattern_2(self, reduce_op, block) -> bool: """ Identify the pattern: - + ``y = (x - mean) / pow(variance + epsilon) * gamma + beta`` This pattern corresponds to, and should be fused as, ``instance_norm``. - + All of the following conditions must be satisfied: - + 1. ``input`` is rank 4 tensor. 2. ``reduce`` operates on spatial dimensions ``axes=[-2, -1]``, or ``axes=[-3, -2]`` (a channel first to channel last transpose would be inserted in such cases). 3. ``gamma`` and ``beta`` are both shape ``(C,)`` after ``squeeze``, where ``C`` is number of channels. - + .. code-block:: |----> sub -----| const (0.5) @@ -523,14 +534,14 @@ def _try_match_and_transform_pattern_3(self, reduce_op, block) -> bool: Detect ``InstanceNorm`` pattern in TensorFlow-Addons. This pattern corresponds to, and should be fused as, ``instance_norm``. - + All of the following conditions must be satisfied: - + 1. ``input`` is rank 4 tensor. 2. ``reduce`` operates on spatial dimensions ``axes=[-2, -1]``, or ``axes=[-3, -2]`` (a channel first to channel last transpose would be inserted in such cases). 3. ``gamma`` and ``beta`` are absent. Default values for ``gamma`` and ``beta`` would be used. - + .. code-block:: |-------------------------------------------------| @@ -661,18 +672,18 @@ def _try_match_and_transform_pattern_3(self, reduce_op, block) -> bool: def _try_match_and_transform_pattern_4(self, reduce_op: Operation, block: Block) -> bool: """ Identify the pattern: - + ``y = x * [gamma * rsqrt(variance + eps)] + (beta - mean * [gamma * rsqrt(variance + eps)])`` This pattern corresponds to, and should be fused as, ``instance_norm``. - + All of the following conditions must be satisfied: - + 1. ``input`` is rank 4 tensor. 2. ``reduce`` operates on spatial dimensions ``axes=[-2, -1]`` or ``axes=[-3, -2]`` (a channel first to channel last transpose would be inserted in such cases). 3. ``gamma`` and ``beta`` are both shape ``(C,)`` after ``squeeze``, where ``C`` is number of channels. - + .. code-block:: |-----------| @@ -704,8 +715,17 @@ def _try_match_and_transform_pattern_4(self, reduce_op: Operation, block: Block) # check that root_var feeds into exactly 4 ops if len(root_var.child_ops) != 4: return False - if root_var.op is not None and not self._check_child_op_types( - root_var.op, child_op_types=["mul", "mul", "reduce_sum", "mul"] + + if ( + root_var.op is not None + and not self._check_child_op_types( + root_var.op, child_op_types=["mul", "mul", "reduce_sum", "mul"] + ) + and not self._check_child_op_types( + # The _check_child_op_types checks for the exact order of the child_ops. + root_var.op, + child_op_types=["mul", "mul", "mul", "reduce_sum"], + ) ): return False @@ -988,6 +1008,15 @@ def _try_match_and_transform_pattern_5(self, reduce_op, block) -> bool: # For simplicity don't handle this edge case. return False + if add_beta_op is None and mul_gamma_op is None: + # Gamma and beta are optional in layer_norm. + pass + elif add_beta_op is None or mul_gamma_op is None: + # If only one of gamma or beta is present, they could + # be folded into the layer_norm op. For simplicity + # don't handle this edge case. + return False + if has_beta_and_gamma: beta_var = add_beta_op.y if add_beta_op.x == mul_op.outputs[0] else add_beta_op.x @@ -1005,27 +1034,20 @@ def _try_match_and_transform_pattern_5(self, reduce_op, block) -> bool: name="_fuse_layernorm_beta" ) - ops_to_remove.append(add_beta_op) - ops_to_remove.append(mul_gamma_op) + ops_to_remove.extend([add_beta_op, mul_gamma_op]) end_op = mul_gamma_op - if add_beta_op is None and mul_gamma_op is None: - # Gamma and beta are optional in layer_norm. - pass - elif add_beta_op is None or mul_gamma_op is None: - # If only one of gamma or beta is present, they could - # be folded into the layer_norm op. For simplicity - # don't handle this edge case. - return False - return self._try_apply_transform( reduce_op, block, gamma_var, beta_var, epsilon_var, end_op, ops_to_remove ) @block_context_manager def _fuse_layernorm_or_instancenorm_block(self, block: Block): - fusion_status = False - for i, op in enumerate(list(block.operations)): + fusion_occurred = False + for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -1035,21 +1057,15 @@ def _fuse_layernorm_or_instancenorm_block(self, block: Block): # start pattern match if reduce_mean op is encountered if op.op_type == "reduce_mean": - if fusion_status is False: - fusion_status = self._try_match_and_transform_pattern_1(op, block) - if fusion_status is False: - fusion_status = self._try_match_and_transform_pattern_2(op, block) - if fusion_status is False: - fusion_status = self._try_match_and_transform_pattern_3(op, block) - if fusion_status is False: - fusion_status = self._try_match_and_transform_pattern_5(op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status + if self._try_match_and_transform_pattern_1(op, block): + fusion_occurred = True + elif self._try_match_and_transform_pattern_2(op, block): + fusion_occurred = True + elif self._try_match_and_transform_pattern_3(op, block): + fusion_occurred = True + elif self._try_match_and_transform_pattern_5(op, block): + fusion_occurred = True elif op.op_type == "reduce_sum": - if fusion_status is False: - fusion_status = self._try_match_and_transform_pattern_4(op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_match_and_transform_pattern_4(op, block): + fusion_occurred = True + return fusion_occurred diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py b/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py index f1826b259..cd66e421e 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_quantization.py @@ -3,15 +3,15 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -from typing import Tuple +from typing import List, Set, Tuple import numpy as np -import coremltools.converters.mil.mil.types as types from coremltools.converters.mil._deployment_compatibility import AvailableTarget +from coremltools.converters.mil.frontend import _utils from coremltools.converters.mil.mil import Block from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Operation, Var +from coremltools.converters.mil.mil import Operation, Var, types from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass from coremltools.converters.mil.mil.passes.helper import ( _check_child_op_type, @@ -22,11 +22,11 @@ @register_pass(namespace="common") -class merge_tensorwise_affine_dequantize_with_consecutive_ops(AbstractGraphPass): +class merge_affine_dequantize_with_consecutive_ops(AbstractGraphPass): """ This graph pass does const folding to a chain of supported ops starts with a - tensor-wise ``constexpr_affine_dequantize`` op. i.e., both ``scale`` and - ``zero_point`` are scalar (rank 0). + ``constexpr_affine_dequantize`` op. More types of op are supported when quantization + is tensor-wise, and only a subset is supported for channel-wise For example: Input graph: @@ -45,43 +45,48 @@ class merge_tensorwise_affine_dequantize_with_consecutive_ops(AbstractGraphPass) --> constexpr_affine_dequantize -> reshape -> out_2 """ - SUPPORTED_OPS = [ + SUPPORTED_OP_TYPES_PER_TENSOR = { "transpose", "reshape", "expand_dims", "squeeze", - ] + } + SUPPORTED_OP_TYPES_PER_CHANNEL = {"transpose"} + assert SUPPORTED_OP_TYPES_PER_CHANNEL.issubset( + SUPPORTED_OP_TYPES_PER_TENSOR + ), "If an op can merge with channel-wise quantization, then it must also be able to merge with tensor-wise quantization" def apply(self, prog): for f in prog.functions.values(): block_changed = True while block_changed: - block_changed = self.merge_tensorwise_affine_dequantize_with_consecutive_ops_block( - f - ) + block_changed = self.merge_affine_dequantize_with_consecutive_ops_block(f) @block_context_manager - def merge_tensorwise_affine_dequantize_with_consecutive_ops_block(self, block): - fusion_status = False + def merge_affine_dequantize_with_consecutive_ops_block(self, block: Block): + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: - block_changed = ( - self.merge_tensorwise_affine_dequantize_with_consecutive_ops_block(b) - ) + block_changed = self.merge_affine_dequantize_with_consecutive_ops_block(b) if op.op_type != "constexpr_affine_dequantize": continue - fusion_status = self._try_to_transform(op, block) - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(op, block): + fusion_occurred = True + return fusion_occurred @staticmethod - def _apply_equivalent_transform(val, op): - if op.op_type not in merge_tensorwise_affine_dequantize_with_consecutive_ops.SUPPORTED_OPS: + def _apply_equivalent_transform(val: np.ndarray, op: Operation) -> np.ndarray: + if ( + op.op_type + not in merge_affine_dequantize_with_consecutive_ops.SUPPORTED_OP_TYPES_PER_TENSOR + ): raise ValueError(f"unsupported op_type {op.op_type}") if op.op_type == "transpose": @@ -97,15 +102,9 @@ def _apply_equivalent_transform(val, op): return np.squeeze(val, axis=tuple(op.axes.val.tolist())) @staticmethod - def _try_to_transform(op, block): - # first check if it is tensorwise quantization - if op.scale.rank != 0 or op.zero_point.rank != 0: - return False - - # first check if quantized_data only feeds into a single op - if len(op.quantized_data.child_ops) != 1: - return False - + def search_for_ops_to_fold( + op: Operation, block: Block, supported_op_types: Set[str] + ) -> List[Operation]: # traverse the graph to get a chain of applicable ops to fold ops_to_fold = [] cursor = op @@ -113,32 +112,82 @@ def _try_to_transform(op, block): prev_cursor = cursor if cursor.outputs[0] in block.outputs: break - for val in merge_tensorwise_affine_dequantize_with_consecutive_ops.SUPPORTED_OPS: - if _check_child_op_type(cursor, val): + for supported_op_type in supported_op_types: + if _check_child_op_type(cursor, supported_op_type): ops_to_fold.append(cursor.outputs[0].child_ops[0]) cursor = ops_to_fold[-1] break if prev_cursor == cursor: break + return ops_to_fold + + @staticmethod + def _try_to_transform_per_tensor(op: Operation, block: Block) -> bool: + assert ( + op.scale.rank == 0 and op.zero_point.rank == 0 + ), "The _try_to_transform_per_tensor method should only be used for per-tensor dequantization case" + + ops_to_fold = merge_affine_dequantize_with_consecutive_ops.search_for_ops_to_fold( + op, block, merge_affine_dequantize_with_consecutive_ops.SUPPORTED_OP_TYPES_PER_TENSOR + ) + if len(ops_to_fold) == 0: + return False + + # do the same transformation on the source quantized data + cursor = op.quantized_data.val + for op_to_fold in ops_to_fold: + cursor = merge_affine_dequantize_with_consecutive_ops._apply_equivalent_transform( + cursor, op_to_fold + ) + + # after transformation, we create a new constexpr_affine_dequantize op and do the replacement + new_var = _utils._construct_constexpr_affine_op( + cursor, + op.zero_point, + op.scale, + op.axis, + name=ops_to_fold[-1].outputs[0].name, + before_op=ops_to_fold[-1], + ) + block.replace_uses_of_var_after_op( + anchor_op=ops_to_fold[-1], + old_var=ops_to_fold[-1].outputs[0], + new_var=new_var, + force_replace=True, + ) + block.remove_ops([op] + ops_to_fold) + return True + + @staticmethod + def _try_to_transform_per_channel(op: Operation, block: Block) -> bool: + scale = op.scale + zero_point = op.zero_point + # positively canonicalize axis for easier manipulation later on + axis = op.axis.val if op.axis.val >= 0 else op.axis.val + op.quantized_data.rank + ops_to_fold = merge_affine_dequantize_with_consecutive_ops.search_for_ops_to_fold( + op, + block, + merge_affine_dequantize_with_consecutive_ops.SUPPORTED_OP_TYPES_PER_CHANNEL, + ) if len(ops_to_fold) == 0: return False # do the same transformation on the source quantized data cursor = op.quantized_data.val - for val in ops_to_fold: - cursor = ( - merge_tensorwise_affine_dequantize_with_consecutive_ops._apply_equivalent_transform( - cursor, val - ) + for op_to_fold in ops_to_fold: + cursor = merge_affine_dequantize_with_consecutive_ops._apply_equivalent_transform( + cursor, op_to_fold ) + if op_to_fold.op_type == "transpose": + axis = np.where(op_to_fold.perm.val == axis)[0][0] # after transformation, we create a new constexpr_affine_dequantize op and do the replacement new_var = mb.constexpr_affine_dequantize( quantized_data=cursor, - zero_point=op.zero_point, - scale=op.scale, - axis=op.axis, + zero_point=zero_point, + scale=scale, + axis=axis, name=ops_to_fold[-1].outputs[0].name, before_op=ops_to_fold[-1], ) @@ -151,6 +200,17 @@ def _try_to_transform(op, block): block.remove_ops([op] + ops_to_fold) return True + def _try_to_transform(self, op: Operation, block: Block) -> bool: + # make sure quantized_data only feeds into a single op + if len(op.quantized_data.child_ops) != 1: + return False + + if op.scale.rank == 0 and op.zero_point.rank == 0: + return self._try_to_transform_per_tensor(op, block) + else: + return self._try_to_transform_per_channel(op, block) + + @register_pass(namespace="common") class int_op_canonicalization(AbstractGraphPass): """ @@ -315,7 +375,11 @@ def apply(self, prog): @block_context_manager def _nullify_redundant_quantization_zero_point_block(self, block: Block): def apply_block(block: Block) -> bool: + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: self._nullify_redundant_quantization_zero_point_block(b) @@ -325,9 +389,9 @@ def apply_block(block: Block) -> bool: # has to break as the downstream iterator is affected if self.try_transform_zp128_quantize_dequantize(op): - return True + fusion_occurred = True - return False + return fusion_occurred need_transformation = True while need_transformation: @@ -507,15 +571,18 @@ def apply(self, prog): @block_context_manager def _dequantize_quantize_pair_elimination_block(self, block): def apply_block(block: Block) -> bool: + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: self._dequantize_quantize_pair_elimination_block(b) # has to break as the downstream iterator is affected if self.try_dequantize_quantize_pair_elimination(op): - return True - - return False + fusion_occurred = True + return fusion_occurred need_transformation = True while need_transformation: @@ -869,7 +936,7 @@ def apply_block(block): apply_block(f) def is_valid_op(self, op): - return op.op_type == "dequantize" and op.outputs[0].val is not None + return op.op_type == "dequantize" and op.can_materialize_val() def transform_op(self, op): quantized_data = op.input.val @@ -882,22 +949,15 @@ def transform_op(self, op): else: zero_point = np.int8(0) if op.input.dtype == types.int8 else np.uint8(0) - # In dequantize semantics, axis may be None: - # when scale is a scalar, axis is None - # - # In constexpr_affine_dequantize semantics, None axis is not allowed; - # since axis is not referred to when scale is a scalar, we pass a dummy - axis = 0 - if op.axis is not None: - axis = op.axis.val + axis = None if op.axis is None else op.axis.val - new_var = mb.constexpr_affine_dequantize( - quantized_data=quantized_data, - zero_point=zero_point, - scale=scale, - axis=axis, - before_op=op, + new_var = _utils._construct_constexpr_affine_op( + quantized_data, + zero_point, + scale, + axis, name=op.name + "_affine_dequantized", + before_op=op, ) block = op.enclosing_block diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_repeat_ops.py b/coremltools/converters/mil/mil/passes/defs/optimize_repeat_ops.py index aab66dd7b..76be08a6f 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_repeat_ops.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_repeat_ops.py @@ -103,11 +103,14 @@ def _replace_ops(block, padding_op, child_padding_op, final_pad): @block_context_manager def _merge_padding_block(self, block): + fusion_happens = False for op in list(block.operations): - result = self._match_pattern(block, op) - if result: - return True - return False + if op.enclosing_block is None: + continue + + if self._match_pattern(block, op): + fusion_happens = True + return fusion_happens @register_pass(namespace="common") class merge_consecutive_transposes(AbstractGraphPass): @@ -142,7 +145,9 @@ def _replace_ops(block, transpose_op, child_transpose_op): new_perm = [perm[i] for i in child_transpose_op.perm.val] x = mb.transpose(x=transpose_op.x, perm=new_perm, before_op=transpose_op) if transpose_op.enclosing_block.try_replace_uses_of_var_after_op( - anchor_op=transpose_op, old_var=child_transpose_op.outputs[0], new_var=x, + anchor_op=child_transpose_op, + old_var=child_transpose_op.outputs[0], + new_var=x, ): block.remove_ops([transpose_op, child_transpose_op]) return True @@ -151,10 +156,13 @@ def _replace_ops(block, transpose_op, child_transpose_op): @block_context_manager def _merge_transposes_in_block(self, block): def help_merge_transpose_ops(block): + fusion_happens = False for op in list(block.operations): + if op.enclosing_block is None: + continue if self._match_and_replace_pattern(block, op): - return True - return False + fusion_happens = True + return fusion_happens block_changed = True while block_changed: @@ -189,7 +197,7 @@ def _match_and_replace_pattern(self, block, relu_op): @staticmethod def _replace_ops(block, relu_op, child_relu_op): if relu_op.enclosing_block.try_replace_uses_of_var_after_op( - anchor_op=relu_op, old_var=child_relu_op.outputs[0], new_var=relu_op.outputs[0] + anchor_op=child_relu_op, old_var=child_relu_op.outputs[0], new_var=relu_op.outputs[0] ): block.remove_ops([child_relu_op]) return True @@ -198,10 +206,13 @@ def _replace_ops(block, relu_op, child_relu_op): @block_context_manager def _merge_relus_in_block(self, block): def help_merge_relu_ops(block): + fusion_happens = False for op in list(block.operations): + if op.enclosing_block is None: + continue if self._match_and_replace_pattern(block, op): - return True - return False + fusion_happens = True + return fusion_happens block_changed = True while block_changed: @@ -259,7 +270,11 @@ def _match_pattern(reshape_op): @block_context_manager def _merge_consecutive_reshapes_block(self, block): def help_merge_consecutive_reshapes_block(block): - for op in block.operations: + fusion_happens = False + for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -285,9 +300,9 @@ def help_merge_consecutive_reshapes_block(block): new_var=reshape_out, ) reshape_ops[-1].enclosing_block.remove_ops(reshape_ops) - return True + fusion_happens = True - return False + return fusion_happens block_changed = True while block_changed: @@ -333,11 +348,11 @@ class cast_optimization(AbstractGraphPass): This is a non-algebraic translation which assumes that the upcasting doesn't change the user's intent. (1) Example for redundant ``cast`` op removal: - .. sourcecode:: python + .. code-block:: Input graph: input(fp16) -> cast(dtype="fp16") -> relu -> out - + Output graph: input -> relu -> out @@ -345,11 +360,11 @@ class cast_optimization(AbstractGraphPass): Hence, it can be removed. (2) Example for two ``cast`` ops fusion: - .. sourcecode:: python + .. code-block:: Input graph: input(int8) -> cast(dtype="fp16") -> cast(dtype="fp32") -> out - + Output graph: input(int8) -> cast(dtype="fp32") -> out @@ -357,11 +372,11 @@ class cast_optimization(AbstractGraphPass): so the fusion is allowed. (3) Negative example for two ``cast`` ops fusion: - .. sourcecode:: python + .. code-block:: Input graph: input(fp32) -> cast(dtype="bool") -> cast(dtype="fp16") -> out - + Output graph: Same as input graph. @@ -370,11 +385,11 @@ class cast_optimization(AbstractGraphPass): If we fuse them, the output would be in the range and resolution of ``fp16`` instead. (4) Another Negative example for two ``cast`` ops fusion: - .. sourcecode:: python + .. code-block:: Input graph: input(int32) -> cast(dtype="int8") -> cast(dtype="uint8") -> out - + Output graph: Same as input graph. @@ -389,7 +404,10 @@ class cast_optimization(AbstractGraphPass): For more examples, please see the unittests that start with prefix ``TestCastOptimization`` in ``test_passes.py``. """ + _num_of_visited_ops = 0 # Testing purpose, making sure the algorithm performs in O(N) + def apply(self, prog): + self._num_of_visited_ops = 0 for f in prog.functions.values(): self._fuse_or_cancel_consecutive_casts_block_wrapper(f) @@ -507,6 +525,8 @@ def _fuse_cast_ops(self, cast_ops: List[Operation], reuse_input_var: bool = Fals def _try_to_transform(self, root_op, cast_ops_across_blocks): block = root_op.enclosing_block + if block is None: + return False # Scenario: Redundant cast when source and destination dtype are same. if root_op.op_type == "cast" and root_op.x.is_tensor_or_scalar_of(dtype=root_op.dtype.val): @@ -554,18 +574,27 @@ def _fuse_casts_ops_across_blocks(self, block: Block, ops_to_fused: Tuple[Operat def _fuse_or_cancel_consecutive_casts_block_wrapper(self, block): def _fuse_or_cancel_consecutive_casts_block(block, cast_ops_across_blocks): # We first make sure all the inner blocks are optimized - # It is important to do it separately in the very beginning, to ensure the last step of optimization cast ops across the block boundary is correct. - for i, op in enumerate(list(block.operations)): + # It is important to do it seperately in the very beginning, to ensure the last step of optimization cast ops across the block boundary is correct. + for op in block.operations: for b in op.blocks: self._fuse_or_cancel_consecutive_casts_block_wrapper(b) - for i, op in enumerate(list(block.operations)): + fusion_happens = False + for op in list(block.operations): + self._num_of_visited_ops += 1 # start pattern match if cast op is encountered if op.op_type == "cast": if self._try_to_transform(op, cast_ops_across_blocks): - # has to break as the downstream iterator is affected. - return True - return False + # It is important not to exist the loop right away when a fusion happens, + # in order to make the time conplexity low. + # For instance, given a program of the pattern: + # relu -> relu -> cast -> cast -> cast, + # the three cast ops can be fused into a single cast op in one shot. + # On the other hand, if we break the loop right away, the + # two relu ops will be visited 3 times, and makes the overal + # time complexity O(N^2). + fusion_happens = True + return fusion_happens block_changed = True cast_ops_across_blocks = defaultdict(set) @@ -1784,7 +1813,7 @@ def _reduce_transposes_block(block): which is simpler to do when all the ops in the block are free of sub blocks. The case of transpose fusion with sub-block containing ops needs to be handled with more care and test cases. """ - for op in list(block.operations): + for op in block.operations: if len(op.blocks) > 0: return diff --git a/coremltools/converters/mil/mil/passes/defs/optimize_tensor_operation.py b/coremltools/converters/mil/mil/passes/defs/optimize_tensor_operation.py index 4466a09b5..bd3d9d62a 100644 --- a/coremltools/converters/mil/mil/passes/defs/optimize_tensor_operation.py +++ b/coremltools/converters/mil/mil/passes/defs/optimize_tensor_operation.py @@ -48,8 +48,11 @@ def apply(self, prog): @block_context_manager def fuse_squeeze_expand_dims_block(self, block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -60,11 +63,9 @@ def fuse_squeeze_expand_dims_block(self, block): squeeze_op = self._match_pattern(op) if squeeze_op is not None: - fusion_status = self._try_to_transform(squeeze_op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(squeeze_op, block): + fusion_occurred = True + return fusion_occurred @staticmethod def _match_pattern(op): @@ -268,8 +269,11 @@ def _get_prod(start, end, arr, skip_indices): @block_context_manager def expand_high_rank_reshape_and_transpose_block(self, block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -279,11 +283,9 @@ def expand_high_rank_reshape_and_transpose_block(self, block): ops = self._match_pattern(op) if ops is not None: - fusion_status = self._try_to_transform(ops, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(ops, block): + fusion_occurred = True + return fusion_occurred @register_pass(namespace="common") class concat_to_pixel_shuffle(AbstractGraphPass): @@ -545,8 +547,11 @@ def _try_to_transform(concat_op, add_op, block): @block_context_manager def _fuse_concat_interleave(self, block): - fusion_status = False + fusion_occurred = False for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -556,11 +561,9 @@ def _fuse_concat_interleave(self, block): concat_op = self._match_pattern(op) if concat_op is not None: - fusion_status = self._try_to_transform(op, concat_op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(op, concat_op, block): + fusion_occurred = True + return fusion_occurred @register_pass(namespace="common") @@ -653,8 +656,11 @@ def _try_to_transform(onehot_op, block): @block_context_manager def _fuse_onehot_matmul_to_gather_block(self, block): - fusion_status = False - for i, op in enumerate(list(block.operations)): + fusion_occurred = False + for op in list(block.operations): + if op.enclosing_block is None: + continue + for b in op.blocks: block_changed = True while block_changed: @@ -665,11 +671,9 @@ def _fuse_onehot_matmul_to_gather_block(self, block): # start pattern match if one_hot op is encountered if op.op_type == "one_hot": - fusion_status = self._try_to_transform(op, block) - # has to break as the downstream iterator is affected. - if fusion_status: - return fusion_status - return fusion_status + if self._try_to_transform(op, block): + fusion_occurred = True + return fusion_occurred @register_pass(namespace="common") diff --git a/coremltools/converters/mil/mil/passes/defs/preprocess.py b/coremltools/converters/mil/mil/passes/defs/preprocess.py index 3f9ea7b1a..e8dd6f899 100644 --- a/coremltools/converters/mil/mil/passes/defs/preprocess.py +++ b/coremltools/converters/mil/mil/passes/defs/preprocess.py @@ -9,9 +9,11 @@ from coremltools import _logger as logger from coremltools.converters.mil.input_types import EnumeratedShapes, ImageType, Shape +from coremltools.converters.mil.mil import Block from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Function, types +from coremltools.converters.mil.mil import Function, Program, types from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass +from coremltools.converters.mil.mil.passes.helper import block_context_manager from coremltools.converters.mil.mil.passes.pass_registry import register_pass @@ -48,7 +50,7 @@ def _transform_to_channel_first(shape): else: return shape[:-3] + [shape[-1]] + shape[-3:-1] - main_input_types = list(prog.main_input_types) + main_input_types = list(prog.functions["main"].input_types) for idx, input_type in enumerate(main_input_types): if isinstance(input_type, ImageType) and not input_type.channel_first: name = input_type.name @@ -88,9 +90,6 @@ def _transform_to_channel_first(shape): # Update Function input var prog.functions["main"]._input_dict[name] = placeholder_op.outputs[0] - prog.functions["main"].function_inputs = tuple( - prog.functions["main"]._input_dict.values() - ) # Add transpose into graph (Transpose from NCHW back to NHWC) curr_block = prog.functions["main"] @@ -108,7 +107,7 @@ def _transform_to_channel_first(shape): curr_block.replace_uses_of_var_after_op( anchor_op=None, old_var=old_var, new_var=new_input ) - prog.main_input_types = tuple(main_input_types) + prog.functions["main"].input_types = tuple(main_input_types) class NameSanitizer: @@ -311,26 +310,30 @@ def apply(self, prog): prog.functions["main"], sanitizer_vars, sanitizer_ops, - prog.main_input_types, + prog.functions["main"].input_types, sanitize_model_inputs_outputs_only=True, ) +# TODO: rdar://122845072 ([Infra] Refactor the transform_function_signatures, adjust_io_to_supported_types and update_output_dtypes using a shared graph pass) @register_pass(namespace="common") class update_output_dtypes(AbstractGraphPass): """ - Update the dtypes of output vars of the main block to match the dtypes - provided in ``prog.main_output_types``, which in turn is populated by the - ``outputs`` argument provided by the user in the ``coremltools.convert()`` API. - This graph pass assumes that the list of outputs in ``prog.main_output_types`` (if not ``None``), + Update the dtypes of output vars of each function block to match the dtypes + provided in ``function.output_types``. The output types for the main function + is populated by the ``outputs`` argument provided by the user in the ``coremltools.convert()`` API. + This graph pass assumes that the list of outputs in ``function.output_types`` (if not ``None``), are in the same order as the output vars. """ - def apply(self, prog): - user_provided_output_types = prog.main_output_types - main_func = prog.functions["main"] - output_vars = main_func.outputs - input_vars = list(main_func.inputs.values()) + @block_context_manager + def adjust_function_output_types(self, func: Function) -> None: + """ + Adjust output dtypes for a pymil function. + """ + user_provided_output_types = func.output_types + output_vars = func.outputs + input_vars = list(func.inputs.values()) if user_provided_output_types is None or len(user_provided_output_types) == 0: return if len(output_vars) != len(user_provided_output_types): @@ -367,11 +370,15 @@ def apply(self, prog): output_var.set_name( output_var_name + "_type_" + types.builtin_to_string(output_var.dtype) ) - with main_func: - output_var = mb.cast( - x=output_var, dtype=types.builtin_to_string(required_output_dtype) - ) - output_var.set_name(output_var_name) - new_outputs.append(output_var) + new_output_var = mb.cast( + x=output_var, dtype=types.builtin_to_string(required_output_dtype) + ) + new_output_var.set_name(output_var_name) + Block._copy_scope_info(output_var, new_output_var) + new_outputs.append(new_output_var) + + func.set_outputs(new_outputs) - main_func.set_outputs(new_outputs) + def apply(self, prog: Program): + for func in prog.functions.values(): + self.adjust_function_output_types(func) diff --git a/coremltools/converters/mil/mil/passes/defs/quantization.py b/coremltools/converters/mil/mil/passes/defs/quantization.py index fab3e1656..6c016a586 100644 --- a/coremltools/converters/mil/mil/passes/defs/quantization.py +++ b/coremltools/converters/mil/mil/passes/defs/quantization.py @@ -5,13 +5,15 @@ from abc import abstractmethod from enum import Enum as _Enum -from typing import Set, Text +from typing import Dict, Set, Text, Tuple import numpy as np from coremltools.converters.mil._deployment_compatibility import AvailableTarget +from coremltools.converters.mil.input_types import TensorType +from coremltools.converters.mil.mil import Block from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Operation, types +from coremltools.converters.mil.mil import Function, Operation, Var, types from coremltools.converters.mil.mil.block import is_current_opset_version_compatible_with from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass @@ -19,6 +21,7 @@ from coremltools.converters.mil.mil.passes.pass_registry import register_pass from coremltools.converters.mil.mil.program import Program from coremltools.converters.mil.mil.types.symbolic import is_symbolic +from coremltools.converters.mil.mil.types.type_mapping import string_to_builtin class ComputePrecision(_Enum): @@ -47,6 +50,14 @@ def __init__(self, op_selector=None): ) self.op_selector = op_selector + # Var that feeds into multiple ops will be cast once and cached into this dict + # For reference: Checkout test_single_input_to_multiple_operations in `TestFP16CastTransform`. + # Note that, we make it a stack of dict to keep tracking the blocks + self._cache_vars = [] + + def current_cache_vars(self) -> Set[Var]: + return self._cache_vars[-1] + def apply(self, prog): """ Walks over each operation in the graph and performs following two steps, @@ -68,6 +79,7 @@ def apply(self, prog): @block_context_manager def apply_block(block): + self._cache_vars.append({}) for op in list(block.operations): for b in op.blocks: apply_block(b) @@ -80,6 +92,7 @@ def apply_block(block): need_transform = op.op_type not in getattr(self, "skip_ops_by_type", set()) if need_transform: self.transform_op(op) + self._cache_vars.pop() for f in prog.functions.values(): apply_block(f) @@ -135,10 +148,6 @@ class CastTypeQuantization(AbstractQuantizationPass): def __init__(self, op_selector=None): super().__init__(op_selector=op_selector) - # Var that feeds into multiple ops will be cast once and cached into this dict - # For reference: Checkout test_single_input_to_multiple_operations in `TestFP16CastTransform`. - self.cache_vars = {} - @property @abstractmethod def origin_dtype(self) -> str: @@ -151,6 +160,91 @@ def target_dtype(self) -> str: """Target dtype, such as fp16.""" raise NotImplementedError("target_dtype must be specified in subclass.") + # TODO: rdar://122845072 ([Infra] Refactor the transform_function_signatures, adjust_io_to_supported_types and update_output_dtypes using a shared graph pass) + @block_context_manager + def transform_function_signatures(self, func: Function) -> None: + """ + This utility transform a function input / output signatures from the original_dtype to + the target_dtype. + + For instance, in the add_fp16_cast class, this member function transforms the following + function: + + function(%input(fp32)) { + block0() { + % var_1 = op_1(x=%input) + ... + % output(fp32) = ... + } -> (%output) + } + + into: + + function(%input(fp16)) { + block0() { + # input_cast = cast(x=input, dtype="fp32") + % var_1 = op_1(x=%input_cast) + ... + % output(fp32) = ... + } -> (%output) + } + + and function.output_types is set to [TensorType(dtype=types.fp16)], + in which will be used in common::update_output_dtypes to upgrade the function output dtype accordingly. + + """ + # reset input signatures + old_func_inputs = func.inputs + new_func_inputs = {} + cache_vars = {} + + # cast the new input into the original dtype + for k, v in old_func_inputs.items(): + if v.is_tensor_or_scalar_of(self.origin_dtype): + new_input = mb.placeholder( + shape=v.shape, + dtype=string_to_builtin(self.target_dtype), + name=v.name, + ).outputs[0] + + if v in func.outputs: + new_outputs = [] + for val in func.outputs: + new_outputs.append(new_input if val == v else val) + func.set_outputs(new_outputs) + + new_func_inputs[k] = new_input + cast_input = mb.cast( + x=new_input, + dtype=self.origin_dtype, + before_op=func.operations[0] if len(func.operations) > 0 else None, + ) + cache_vars[k] = cast_input + else: + new_func_inputs[k] = v + cache_vars[k] = v + + # replace the use of the old input vars with the new cast var + for k, v in old_func_inputs.items(): + func.replace_uses_of_var_after_op( + anchor_op=None, + old_var=v, + new_var=cache_vars[k], + ) + func._input_dict = new_func_inputs + + # reset output signatures + if func.output_types is None: + output_types = [TensorType(dtype=v.dtype) for v in func.outputs] + else: + output_types = func.output_types + + for idx, v in enumerate(output_types): + if v.dtype == string_to_builtin(self.origin_dtype): + output_types[idx] = TensorType(dtype=string_to_builtin(self.target_dtype)) + + func.output_types = output_types + def should_cast_parameter(self, op: Operation, param_name: str) -> bool: """ Determines if a param of an op should be cast to target_dtype. @@ -167,6 +261,13 @@ def should_cast_parameter(self, op: Operation, param_name: str) -> bool: return True + def _get_casted_outputs(self, op: Operation, casted_inputs: Dict[str, Var]) -> Tuple[Var]: + """ + Given an op and casted_inputs, this utility returns the new resulting outputs. + """ + return getattr(mb, op.op_type)(**casted_inputs) + + def transform_op(self, op) -> None: """Transform the input(s)/output(s) dtypes of the op.""" block = op.enclosing_block @@ -190,18 +291,23 @@ def transform_op(self, op) -> None: casted_var_name = f"{var.name}_to_{self.target_dtype}" if ( len(var._child_ops) > 1 - and casted_var_name in self.cache_vars - and (block.is_var_visible_in_block(self.cache_vars[casted_var_name])) + and casted_var_name in self.current_cache_vars() ): - casted_inputs[param][i] = self.cache_vars[casted_var_name] + casted_inputs[param][i] = self.current_cache_vars()[casted_var_name] else: - x = mb.cast(x=var, dtype=self.target_dtype, name=casted_var_name, before_op=op) + x = mb.cast( + x=var, + dtype=self.target_dtype, + name=casted_var_name, + before_op=op, + ) if self.target_dtype == "fp16": self._check_underflow_to_zero(x, var) + Block._copy_metadata(var, x) casted_inputs[param][i] = x if len(var._child_ops) > 1: - self.cache_vars[casted_var_name] = casted_inputs[param][i] + self.current_cache_vars()[casted_var_name] = casted_inputs[param][i] if not is_list_input: casted_inputs[param] = casted_inputs[param][0] @@ -210,7 +316,7 @@ def transform_op(self, op) -> None: casted_inputs.update({k: v for k, v in op.inputs.items() if k not in casted_inputs}) casted_inputs["name"] = f"{op.name}_cast_{self.target_dtype}" casted_inputs["before_op"] = op - quant_output = getattr(mb, op.op_type)(**casted_inputs) + quant_output = self._get_casted_outputs(op, casted_inputs) if not isinstance(quant_output, (list, tuple)): quant_output = [quant_output] @@ -232,6 +338,7 @@ def transform_op(self, op) -> None: force_replace=True, ) else: + op.enclosing_block.replace_uses_of_var_after_op( anchor_op=op, old_var=old_output_var, @@ -401,17 +508,20 @@ def skip_ops_by_type(self, criteria: Text): @register_pass(namespace="common") class add_int16_cast(CastTypeQuantization): """ - This transform does the following, for each op that supports int16: - - For each input of dtype int32 which actually supports int16, inject a "cast" op to change it - to int16 dtype. - - For each output of dtype int16, inject a "cast" op to change it back to int32. - It's mainly for int16 op ANE residency. + This transform does the following, for each op that supports int16/uint16: + - For each input of dtype int32 which supports int16/uint16, inject a "cast" op to change it + to int16/uint16 dtype. + - For each output of dtype int16/uint16, inject a "cast" op to change it back to int32. + Notice that the cast will not be inserted if the const value is out of int16/uint16 range. """ # Ops that prefer int16 params. _PREFER_INT16_OPS: Set[str] = {"gather", "gather_along_axis", "gather_nd"} def __init__(self, op_selector=None): super().__init__(op_selector=op_selector) + # Use variable instead of hard-coded "int16" because the target dtype could be uint16 + # depending on if the param is non-negative const and within uint16 range. + self._target_dtype: str = "int16" @property def origin_dtype(self) -> str: @@ -419,38 +529,56 @@ def origin_dtype(self) -> str: @property def target_dtype(self) -> str: - return "int16" + return self._target_dtype - @staticmethod - def int16_overflow(op: Operation) -> bool: + @target_dtype.setter + def target_dtype(self, target_dtype: str): + if target_dtype not in {"int16", "uint16"}: + raise ValueError("The target_dtype in add_int16_cast must be int16 or uint16") + self._target_dtype = target_dtype + + def should_cast_parameter(self, op: Operation, param_name: str) -> bool: """ - Determines if any of the op's input will overflow when represented by int16. Constants with - values more than np.iinfo(np.int16).max or less than np.iinfo(np.int16).min overflows in int16. + Determine if a parameter should be cast or not. + If should be cast, determine whether to use int16 or uint16. """ _INT16_MAX = np.iinfo(np.int16).max _INT16_MIN = np.iinfo(np.int16).min - for _, inputs in op.inputs.items(): - is_list_input = isinstance(inputs, (list, tuple)) - if not is_list_input: - inputs = [inputs] - for var in inputs: - if var.val is not None and var.is_tensor_or_scalar_of(dtype="int32"): - if np.any(var.val > _INT16_MAX) or np.any(var.val < _INT16_MIN): - return True + _UINT16_MAX = np.iinfo(np.uint16).max + _UINT16_MIN = np.iinfo(np.uint16).min - # In `gather` and `gather_along_axis`, if the dim size of x is larger than int16 upperbound, - # the dynamic indices could overflow. - if ( - op.op_type in {"gather", "gather_along_axis"} - and op.indices.val is None - and op.x.shape is not None - ): - dim_size = op.x.shape[op.axis.val] - if not is_symbolic(dim_size) and dim_size > _INT16_MAX: - return True + input_var = op.inputs[param_name] + if not input_var.is_tensor_or_scalar_of(dtype="int32"): + return False - return False + input_op = input_var.op + if input_op is not None and input_op.op_type == "const": + if ( + input_op.outputs[0].val.min() >= _UINT16_MIN + and input_op.outputs[0].val.max() <= _UINT16_MAX + ): + self._target_dtype = "uint16" + elif ( + input_op.outputs[0].val.min() >= _INT16_MIN + and input_op.outputs[0].val.max() <= _INT16_MAX + ): + self._target_dtype = "int16" + else: + return False + + # In `gather` and `gather_along_axis`, if the dim size of x is larger than int16 + # upperbound, the dynamic indices could overflow, so it shouldn't be cast. + if op.op_type in {"gather", "gather_along_axis"} and param_name == "indices": + if op.indices.val is None and op.x.shape is not None: + dim_size = op.x.shape[op.axis.val] + if not is_symbolic(dim_size) and dim_size > _INT16_MAX: + return False + + if not super().should_cast_parameter(op, param_name): + return False + + return True def is_valid_op(self, op: Operation) -> bool: - """Determines if op is valid for int16 casting.""" - return op.op_type in self._PREFER_INT16_OPS and not self.int16_overflow(op) + """Determines if op is valid for int16/uint16 casting.""" + return op.op_type in self._PREFER_INT16_OPS diff --git a/coremltools/converters/mil/mil/passes/graph_pass.py b/coremltools/converters/mil/mil/passes/graph_pass.py index 6a39628a9..ffa61801b 100644 --- a/coremltools/converters/mil/mil/passes/graph_pass.py +++ b/coremltools/converters/mil/mil/passes/graph_pass.py @@ -7,6 +7,8 @@ from typing import Callable, List, Optional, Text, Union from coremltools.converters.mil import Operation, Program +from coremltools.converters.mil.mil import Builder as mb +from coremltools.converters.mil.mil.scope import ScopeInfo, ScopeSource class PassOption: @@ -48,7 +50,10 @@ class AbstractGraphPass(ABC): def __call__(self, prog: Program): if not prog.skip_all_passes: - self.apply(prog) + # we use the scope context manager to populate the graph pass information to the ops + # constructed by the pass. + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=[str(self)])): + self.apply(prog) def __str__(self): return type(self).__name__ diff --git a/coremltools/converters/mil/mil/passes/helper.py b/coremltools/converters/mil/mil/passes/helper.py index 1bf1e70c3..72dbde9b3 100644 --- a/coremltools/converters/mil/mil/passes/helper.py +++ b/coremltools/converters/mil/mil/passes/helper.py @@ -3,13 +3,14 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -from typing import List +from typing import Callable, List, Optional import numpy as np from coremltools.converters.mil.mil import Block, Operation from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass + class classproperty(property): """ A decorator class that allow us to have a class-level property @@ -17,7 +18,8 @@ class classproperty(property): def __get__(self, owner, cls): return self.fget(cls) -def block_context_manager(func): + +def block_context_manager(_func: Optional[Callable] = None): """ This decorator executes a function under the context manager `with block`. For instance, given a function `func` with an input block and other arguments: @@ -44,6 +46,7 @@ def func(block, *args): since when the code exit `block`, an expensive _propagate_nonreplaceable_vars() is invoked. The decorator reduces the amount of calling `with block` overally. """ + def wrapper(*args): # Make it compatible with class method. if isinstance(args[0], AbstractGraphPass): @@ -56,8 +59,10 @@ def wrapper(*args): "The function decorated with block_context_manager must have a Block " "type argument as the first input." ) + with block: - return func(*args) + return _func(*args) + return wrapper diff --git a/coremltools/converters/mil/mil/passes/pass_pipeline.py b/coremltools/converters/mil/mil/passes/pass_pipeline.py index eaf8c8bd9..caa1f7d4e 100644 --- a/coremltools/converters/mil/mil/passes/pass_pipeline.py +++ b/coremltools/converters/mil/mil/passes/pass_pipeline.py @@ -83,6 +83,7 @@ "common::merge_consecutive_relus", "common::merge_consecutive_reshapes", "common::merge_consecutive_transposes", + "common::fuse_transpose_matmul", # "expand_high_rank_reshape_and_transpose" must come after "common::merge_consecutive_transposes" "common::expand_high_rank_reshape_and_transpose", "common::reduce_transposes", @@ -93,6 +94,8 @@ "common::remove_redundant_ops", "common::add_fp16_cast", # Will be removed if compute precision is not FP16. "common::add_int16_cast", # Will be removed if compute precision is not FP16. + "common::update_output_dtypes", # Must run again after `add_fp16_cast` and `add_int16_cast`. + "common::const_elimination", "common::dead_code_elimination", # always end with dce ] @@ -103,8 +106,12 @@ "common::dead_code_elimination", # must follow cast_optimization "common::const_elimination", "common::const_deduplication", # after all consts have been settled - "common::dead_code_elimination", # come before merge_tensorwise_affine_dequantize_with_consecutive_ops - "common::merge_tensorwise_affine_dequantize_with_consecutive_ops", # after const_deduplication and dead_code_elimination + "common::dead_code_elimination", # come before merge_affine_dequantize_with_consecutive_ops + "common::merge_affine_dequantize_with_consecutive_ops", # after const_deduplication and dead_code_elimination + "common::expand_dynamic_linear", # if weight or bias were not merged into constexpr, then expand linear to matmul + add + "common::fuse_transpose_matmul", # there might be left over transpose that got created in hoping to use linear, but now can be fused back with matmul + "common::dead_code_elimination", # fused transposes become orphans thus can be elimianted + "common::const_deduplication", # additional consts may be introduced during merging dequantize and expanding linear "common::loop_invariant_elimination", "common::noop_elimination", "common::dedup_op_and_var_names", @@ -250,6 +257,7 @@ class PassPipeline: ) """ + # TODO: rdar://121242189 ([Infra] Have a better way to handle predefined pass pipeline) _PIPELINE_NAME_TO_PASSES = { "default": _COMMON_PASSES + _CLEANUP_PASSES, "cleanup": _CLEANUP_PASSES, @@ -453,8 +461,23 @@ def apply_pipeline(prog: Program, pass_pipeline: PassPipeline): f"The graph pass options for {pass_name} is set to {pass_options}. " f"It will change the pass behavior. Make sure the option is intended." ) + if pass_name.startswith("experimental::"): + logger.warning( + f"The graph pass {pass_name} is under experimental development, " + f"and the API could be changed in the future." + ) graph_pass = PASS_REGISTRY[pass_name] graph_pass.set_options(pass_options) - graph_pass(prog) - prog.validate() + + try: + graph_pass(prog) + except Exception as e: + logger.error( + f"\n\nERROR - '{pass_name}' graph pass produces the following error:\n" + ) + raise e # re-raise exception + + # After dead code elimination, we should check if the program misses any essential scope info + check_essential_scope = pass_name == "common::dead_code_elimination" + prog.validate(check_essential_scope=check_essential_scope) logger.debug(f"Program after {pass_pipeline} pipeline:\n{prog}") diff --git a/coremltools/converters/mil/mil/passes/tests/test_cleanup_passes.py b/coremltools/converters/mil/mil/passes/tests/test_cleanup_passes.py new file mode 100644 index 000000000..6269a95fb --- /dev/null +++ b/coremltools/converters/mil/mil/passes/tests/test_cleanup_passes.py @@ -0,0 +1,2469 @@ +# Copyright (c) 2024, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import copy +import itertools +import unittest + +import numpy as np +import pytest +from mock import patch + +import coremltools as ct +from coremltools.converters.mil import mil +from coremltools.converters.mil.mil import Builder as mb +from coremltools.converters.mil.mil import Function, Symbol, get_new_symbol, types +from coremltools.converters.mil.mil.passes.defs.cleanup import topological_reorder +from coremltools.converters.mil.mil.passes.defs.cleanup.remove_redundant_ops import ( + remove_redundant_ops, +) +from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY +from coremltools.converters.mil.testing_utils import ( + apply_pass_and_basic_check, + assert_model_is_valid, + assert_op_count_match, + assert_same_output_names, + get_op_names_in_program, + get_op_types_in_program, +) + +from .test_passes import _VALIDATE_MODEL, CONSTEXPR_FUNCS, CONSTEXPR_OPS + + +class TestConstDeduplication: + def test_const_deduplication(self): + BATCH_DIM = 5 + SEQUENCE_LENGTH = 4 + ENCODING_DIM = 256 + EMBEDDING_DIM = 128 + weight = np.random.rand(EMBEDDING_DIM, ENCODING_DIM) + bias = np.random.rand(EMBEDDING_DIM) + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), + mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), + ] + ) + def prog(q, k): + q_e = mb.linear(x=q, weight=weight, bias=bias) + k_e = mb.linear(x=k, weight=weight, bias=bias) + attention = mb.matmul(x=q_e, y=k_e, transpose_y=True) + return attention + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") + assert_op_count_match(prev_prog, expect=6, op="const") + assert_op_count_match(prog, expect=4, op="const") + + @pytest.mark.parametrize( + "constexpr_op", + CONSTEXPR_OPS, + ) + def test_constexpr_deduplication(self, constexpr_op): + BATCH_DIM = 5 + SEQUENCE_LENGTH = 4 + ENCODING_DIM = 256 + EMBEDDING_DIM = 128 + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), + mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), + ] + ) + def prog(q, k): + weight_q = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM, ENCODING_DIM), seed=19) + weight_k = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM, ENCODING_DIM), seed=19) + bias_q = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM,), seed=29) + bias_k = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM,), seed=29) + q_e = mb.linear(x=q, weight=weight_q, bias=bias_q) + k_e = mb.linear(x=k, weight=weight_k, bias=bias_k) + attention = mb.matmul(x=q_e, y=k_e, transpose_y=True) + return attention + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") + assert_op_count_match(prev_prog, expect=4, op=constexpr_op) + assert_op_count_match(prog, expect=2, op=constexpr_op) + + def test_const_deduplication_as_outputs(self): + """ + If the duplicated constants are block outputs, we should not remove them. + """ + # case 1: + # const_2 can be eliminated since it is not block output + const = np.random.rand(40, 20, 30) + + @mb.program( + input_specs=[ + mb.TensorSpec( + shape=( + 40, + 20, + 30, + ) + ) + ] + ) + def prog(x): + const_1 = mb.const(val=const, name="const_1") + const_2 = mb.const(val=const, name="const_2") + x = mb.relu(x=x) + x = mb.add(x=x, y=const_2) + return x, const_1 + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") + assert_op_count_match(prev_prog, expect=2, op="const") + assert_op_count_match(prog, expect=1, op="const") + assert prog.functions["main"].outputs[1].name == "const_1" + + # case 2: + # const_2 can not be eliminated since it is a block output + const = np.random.rand(40, 20, 30) + + @mb.program( + input_specs=[ + mb.TensorSpec( + shape=( + 40, + 20, + 30, + ) + ) + ] + ) + def prog(x): + const_1 = mb.const(val=const, name="const_1") + const_2 = mb.const(val=const, name="const_2") + x = mb.relu(x=x) + x = mb.add(x=x, y=const_2) + return x, const_1, const_2 + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") + assert_op_count_match(prev_prog, expect=2, op="const") + assert_op_count_match(prog, expect=2, op="const") + assert prog.functions["main"].outputs[1].name == "const_1" + assert prog.functions["main"].outputs[2].name == "const_2" + + @pytest.mark.skip("rdar://109374995 consts are not shared across blocks") + def test_const_deduplication_multiple_blocks(self): + weight = np.random.rand(5, 3, 2, 2) + + @mb.program(input_specs=[mb.TensorSpec(shape=(4, 3, 8, 8))]) + def prog(x): + def _true_fn(): + return mb.conv(x=x, weight=weight, pad_type="valid") + + def _false_fn(): + y = mb.mul(x=x, y=2.0) + return mb.conv(x=y, weight=weight, pad_type="valid") + + x_gt_0_tensor = mb.greater(x=x, y=0.0) + x_gt_0 = mb.slice_by_index(x=x_gt_0_tensor, begin=(0, 0, 0, 0), end=(1, 1, 1, 1)) + return mb.cond(pred=x_gt_0, _true_fn=_true_fn, _false_fn=_false_fn) + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") + assert_op_count_match(prev_prog, expect=8, op="const") + assert_op_count_match(prog, expect=6, op="const") + + +class TestConstElimination: + def test_const_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + a = np.random.rand(2, 4).astype(np.float32) + double_a = mb.add(x=a, y=a) + return mb.add(x=x, y=double_a) + + assert_op_count_match(prog, expect=2, op="const") + prev_prog = copy.deepcopy(prog) + PASS_REGISTRY["common::const_elimination"](prog) + assert_same_output_names(prev_prog, prog) + assert_op_count_match(prog, expect=3, op="const") + + if _VALIDATE_MODEL: + assert_model_is_valid(prog, {"x": (2, 4)}) + + def test_const_elimination_nonreplaceable(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + a = np.random.rand(2, 4).astype(np.float16) + constexpr_a = mb.constexpr_cast(source_val=a, output_dtype="fp32") + double_a = mb.add(x=constexpr_a, y=a.astype(np.float32)) + return mb.add(x=x, y=double_a) + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_elimination") + assert get_op_types_in_program(prev_prog) == ["constexpr_cast", "add", "add"] + # Not fold into const because the upstream constexpr_cast op is non-replaceable. + assert get_op_types_in_program(prog) == ["constexpr_cast", "add", "add"] + + def test_force_const_eliminate_nonreplaceable_ops(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(3,), dtype=types.int32)]) + def prog(x): + a = np.random.rand(2, 3, 5).astype(np.float16) + constexpr_a = mb.constexpr_cast(source_val=a, output_dtype="fp32") + double_a = mb.add(x=constexpr_a, y=a.astype(np.float32)) + a_shape = mb.shape(x=double_a) + return mb.add(x=x, y=a_shape) + + assert get_op_types_in_program(prog) == ["constexpr_cast", "add", "shape", "add"] + + apply_pass_and_basic_check(prog, "common::const_elimination") + # still fold shape into const regardless the non-replaceable upstream + # constexpr_cast op, since it only provides a shape + assert get_op_types_in_program(prog) == ["constexpr_cast", "add", "add"] + + apply_pass_and_basic_check(prog, "common::dead_code_elimination") + # constexpr_cast(a) and add(a, a) no longer contributes to output, + # so they should get dead code eliminated + assert get_op_types_in_program(prog) == ["add"] + + def test_force_const_eliminate_nonreplaceable_ops_case_2(self): + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(1,), dtype=types.int32), + mb.TensorSpec(shape=(2,), dtype=types.int32), + ], + opset_version=ct.target.iOS17, + ) + def prog(x, y): + a = np.random.rand(2, 3, 5).astype(np.float16) + constexpr_a = mb.constexpr_cast(source_val=a, output_dtype="fp32") + + reshape_shape = mb.concat(values=[y, [5]], axis=0) + reshape = mb.reshape(x=constexpr_a, shape=reshape_shape) + a_shape = mb.shape(x=reshape) + a_shape_int16 = mb.cast(x=a_shape, dtype="int16") + + # Even though the gather ops has constexpr_cast op as upstream, + # it can still be removed by const elimination. + gather = mb.gather( + x=a_shape, + indices=[ + 2, + ], + axis=0, + ) + gather_int32 = mb.cast(x=gather, dtype="int32") + return mb.add(x=x, y=gather) + + assert get_op_types_in_program(prog) == [ + "constexpr_cast", + "concat", + "reshape", + "shape", + "cast", + "gather", + "cast", + "add", + ] + + apply_pass_and_basic_check(prog, "common::const_elimination") + # still const-folding gather into const regardless the non-replaceable upstream + # constexpr_cast op, since it only provides the meta data (shape) + assert get_op_types_in_program(prog) == [ + "constexpr_cast", + "concat", + "reshape", + "shape", + "cast", + "add", + ] + + apply_pass_and_basic_check(prog, "common::dead_code_elimination") + assert get_op_types_in_program(prog) == ["add"] + + @patch( + "coremltools.converters.mil.mil.passes.defs.cleanup.const_elimination._skip_const_by_size", + 1000, + ) + def test_const_elimination_larger_than_threshold(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + # Construct a 10 x 10 matrix (100 elements) which is smaller than the threshold (1000). + tmp = mb.range_1d(start=0, end=10, step=1) + tmp_x = mb.reshape(x=tmp, shape=[-1, 1]) + tmp_y = mb.reshape(x=tmp, shape=[1, -1]) + return mb.matmul(x=tmp_x, y=tmp_y) + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog_large_const_size(x): + # Construct a 100 x 100 matrix (10000 elements) which is larger than the threshold (1000). + tmp = mb.range_1d(start=0, end=100, step=1) + tmp_x = mb.reshape(x=tmp, shape=[-1, 1]) + tmp_y = mb.reshape(x=tmp, shape=[1, -1]) + return mb.matmul(x=tmp_x, y=tmp_y) + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_elimination") + assert get_op_types_in_program(prev_prog) == [ + "range_1d", + "reshape", + "reshape", + "matmul", + ] + # All ops (range_1d, reshape, matmul) constructing that 10x10 matrix is folded into a const. + assert get_op_types_in_program(prog) == [] + + prev_prog_large_const_size, _, _ = apply_pass_and_basic_check( + prog_large_const_size, "common::const_elimination" + ) + assert get_op_types_in_program(prev_prog_large_const_size) == [ + "range_1d", + "reshape", + "reshape", + "matmul", + ] + # The matmul op constructing the large matrix is kept due to size larger than threshold. + assert get_op_types_in_program(prog_large_const_size) == ["matmul"] + + +class TestDeadCodeElimination: + def test_dead_code_elimination(self): + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(2, 4)), + mb.TensorSpec(shape=(2, 4)), + ] + ) + def program0(x, y): + # following three unused op should be eliminated + a = mb.const(val=np.zeros(shape=(1,))) + b = mb.const(val=np.zeros(shape=(1,))) + _ = mb.add(x=a, y=b) + return mb.add(x=x, y=y) + + assert_op_count_match(program0, expect=4) + prev_prog = copy.deepcopy(program0) + PASS_REGISTRY["common::dead_code_elimination"](program0) + assert_same_output_names(prev_prog, program0) + assert_op_count_match(program0, expect=1) + + if _VALIDATE_MODEL: + assert_model_is_valid(program0, {"x": (2, 4), "y": (2, 4)}) + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def program1(x): + weights_val = np.random.rand(4, 2).T.astype(np.float32) + weights = mb.const(val=weights_val) + bias_val = np.random.rand(2).astype(np.float32) + bias = mb.const(val=bias_val) + + # unused op and its inputs should be eliminated + weights_for_matmul = mb.transpose(x=weights, perm=[1, 0]) + mb.matmul(x=x, y=weights_for_matmul) + + return mb.linear(x=x, weight=weights, bias=bias) + + assert_op_count_match(program1, expect=8) + prev_prog = copy.deepcopy(program1) + PASS_REGISTRY["common::dead_code_elimination"](program1) + assert_same_output_names(prev_prog, program1) + assert_op_count_match(program1, expect=3) + + if _VALIDATE_MODEL: + assert_model_is_valid(program1, {"x": (2, 4)}) + + +class TestDedupOpAndVarNames(unittest.TestCase): + def test_unchanged(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + x = mb.reshape(x=x, shape=(1, 8), name="reshape") + return x + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") + + self.assertEqual(get_op_types_in_program(prev_prog), ["reshape"]) + self.assertEqual(get_op_names_in_program(prev_prog), ["reshape"]) + + self.assertEqual(get_op_types_in_program(prog), ["reshape"]) + self.assertEqual(get_op_names_in_program(prog), ["reshape"]) + + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (1, 8)}, + ) + + def test_op_name_duplicated_once(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + x = mb.cast(x=x, dtype="fp16", name="castop") + x = mb.cast(x=x, dtype="fp32", name="castop") + x = mb.square(x=x, name="square_last") + return x + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") + + self.assertEqual(get_op_types_in_program(prev_prog), ["cast", "cast", "square"]) + self.assertEqual(get_op_names_in_program(prev_prog), ["castop", "castop", "square_last"]) + + self.assertEqual(get_op_types_in_program(prog), ["cast", "cast", "square"]) + self.assertEqual(get_op_names_in_program(prog), ["castop", "castop_1", "square_last"]) + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={block.outputs[0].name: (10, 20)}, + ) + + def test_op_name_duplicated_many(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + x = mb.cast(x=x, dtype="fp16", name="castop") + x = mb.cast(x=x, dtype="fp16", name="castop") + x = mb.cast(x=x, dtype="int32", name="castop_2") + x = mb.cast(x=x, dtype="fp16", name="castop") + x = mb.cast(x=x, dtype="fp32", name="castop_2") + x = mb.square(x=x, name="square") + return x + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") + + self.assertEqual( + get_op_types_in_program(prev_prog), ["cast", "cast", "cast", "cast", "cast", "square"] + ) + self.assertEqual( + get_op_names_in_program(prev_prog), + ["castop", "castop", "castop_2", "castop", "castop_2", "square"], + ) + + self.assertEqual( + get_op_types_in_program(prog), ["cast", "cast", "cast", "cast", "cast", "square"] + ) + self.assertEqual( + get_op_names_in_program(prog), + ["castop", "castop_1", "castop_2", "castop_3", "castop_2_1", "square"], + ) + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={block.outputs[0].name: (10, 20)}, + ) + + def test_input_name_shadow(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + # op name "x" results in output var name "x", which shadows prog + # input var name "x" + x = mb.transpose(x=x, perm=[1, 0], name="x") + x = mb.relu(x=x, name="relu") + return x + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") + self.assertEqual(get_op_types_in_program(prev_prog), ["transpose", "relu"]) + self.assertEqual(get_op_names_in_program(prev_prog), ["x", "relu"]) + + self.assertEqual(get_op_types_in_program(prog), ["transpose", "relu"]) + self.assertEqual(get_op_names_in_program(prog), ["x", "relu"]) + + op = prog["main"].find_ops(op_type="transpose")[0] + self.assertEqual("x_1", op.outputs[0].name) + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={block.outputs[0].name: (20, 10)}, + ) + + def test_nested_block(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) + def prog(x): + def true_fn(): + # returns var with name x shadows input 'x' + return mb.add(x=x, y=1.0, name="x") + + def false_fn(): + # two ops with name "x" + return mb.add(x=x, y=-1.0, name="x") + + pred = mb.equal(x=mb.squeeze(x=x), y=1.0) + return mb.cond(pred=pred, _true_fn=true_fn, _false_fn=false_fn) + + cond_op = prog.functions["main"].operations[-1] + assert cond_op.blocks[0].outputs[0].name == "x" + assert cond_op.blocks[1].outputs[0].name == "x" + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") + cond_op = prog.functions["main"].operations[-1] + assert cond_op.blocks[0].outputs[0].name == "x_1" + assert cond_op.blocks[1].outputs[0].name == "x_2" + + assert_model_is_valid( + prog, + {"x": (1,)}, + expected_output_shapes={block.outputs[0].name: (1,)}, + ) + + +class TestExpandDynamicLinear: + def test_keep_static_weight_static_bias(self): + X_SHAPE = (2, 5) + WEIGHT_SHAPE = (3, X_SHAPE[-1]) + + bias_shape = (WEIGHT_SHAPE[0],) + output_shape = (X_SHAPE[0], WEIGHT_SHAPE[0]) + + quantized_weight = np.random.randint(-127, 128, WEIGHT_SHAPE, np.int8) + quantized_bias = np.random.randint(-127, 128, bias_shape, np.int8) + + @mb.program( + input_specs=[mb.TensorSpec(shape=X_SHAPE)], + opset_version=ct.target.iOS16, + ) + def prog(x): + weight = mb.constexpr_affine_dequantize( + quantized_data=quantized_weight, + scale=1.2, + zero_point=np.int8(3), + axis=0, + ) + bias = mb.constexpr_affine_dequantize( + quantized_data=quantized_bias, + scale=4.5, + zero_point=np.int8(6), + axis=0, + ) + return mb.linear(x=x, weight=weight, bias=bias) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_dynamic_linear") + assert get_op_types_in_program(prev_prog) == [ + "constexpr_affine_dequantize", + "constexpr_affine_dequantize", + "linear", + ] + assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) + assert_model_is_valid( + prog, + {"x": X_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + backend=("mlprogram", "fp16"), + minimum_deployment_target=ct.target.iOS16, + ) + + def test_expand_static_weight_dynamic_bias(self): + X_SHAPE = (2, 5) + WEIGHT_SHAPE = (3, X_SHAPE[-1]) + + bias_shape = (WEIGHT_SHAPE[0],) + output_shape = (X_SHAPE[0], WEIGHT_SHAPE[0]) + + weight = np.random.rand(*WEIGHT_SHAPE) + quantized_bias = np.random.randint(-127, 128, bias_shape, np.int8) + + @mb.program( + input_specs=[mb.TensorSpec(shape=X_SHAPE)], + opset_version=ct.target.iOS16, + ) + def prog(x): + bias = mb.constexpr_affine_dequantize( + quantized_data=quantized_bias, + scale=1.2, + zero_point=np.int8(3), + axis=0, + ) + screwed_bias = mb.exp(x=bias) + return mb.linear(x=x, weight=weight, bias=screwed_bias) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_dynamic_linear") + assert get_op_types_in_program(prev_prog) == [ + "constexpr_affine_dequantize", + "exp", + "linear", + ] + assert get_op_types_in_program(prog) == [ + "constexpr_affine_dequantize", + "exp", + "linear", + "add", + ] + assert_model_is_valid( + prog, + {"x": X_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + backend=("mlprogram", "fp16"), + minimum_deployment_target=ct.target.iOS16, + ) + + def test_expand_dynamic_weight_static_zero_bias(self): + X_SHAPE = (2, 5) + WEIGHT_SHAPE = (3, X_SHAPE[-1]) + + output_shape = (X_SHAPE[0], WEIGHT_SHAPE[0]) + + quantized_weight = np.random.randint(-127, 128, WEIGHT_SHAPE, np.int8) + + @mb.program( + input_specs=[mb.TensorSpec(shape=X_SHAPE)], + opset_version=ct.target.iOS16, + ) + def prog(x): + weight = mb.constexpr_affine_dequantize( + quantized_data=quantized_weight, + scale=1.2, + zero_point=np.int8(3), + axis=0, + ) + screwed_weight = mb.exp(x=weight) + return mb.linear(x=x, weight=screwed_weight) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_dynamic_linear") + assert get_op_types_in_program(prev_prog) == [ + "constexpr_affine_dequantize", + "exp", + "linear", + ] + assert get_op_types_in_program(prog) == [ + "constexpr_affine_dequantize", + "exp", + "matmul", + ] + assert_model_is_valid( + prog, + {"x": X_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + backend=("mlprogram", "fp16"), + minimum_deployment_target=ct.target.iOS16, + ) + + def test_expand_dynamic_weight_static_compressed_zero_bias(self): + X_SHAPE = (2, 5) + WEIGHT_SHAPE = (3, X_SHAPE[-1]) + + bias_shape = (WEIGHT_SHAPE[0],) + output_shape = (X_SHAPE[0], WEIGHT_SHAPE[0]) + + quantized_weight = np.random.randint(-127, 128, WEIGHT_SHAPE, np.int8) + quantized_bias = np.random.randint(-127, 128, bias_shape, np.int8) + + @mb.program( + input_specs=[mb.TensorSpec(shape=X_SHAPE)], + opset_version=ct.target.iOS16, + ) + def prog(x): + weight = mb.constexpr_affine_dequantize( + quantized_data=quantized_weight, + scale=1.2, + zero_point=np.int8(3), + axis=0, + ) + bias = mb.constexpr_affine_dequantize( + quantized_data=quantized_bias, + scale=np.random.rand(*bias_shape), + zero_point=quantized_bias, + axis=0, + ) + screwed_weight = mb.exp(x=weight) + return mb.linear(x=x, weight=screwed_weight, bias=bias) + + original_prog, _, _ = apply_pass_and_basic_check(prog, "common::expand_dynamic_linear") + expanded_prog, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + assert get_op_types_in_program(original_prog) == [ + "constexpr_affine_dequantize", + "constexpr_affine_dequantize", + "exp", + "linear", + ] + assert get_op_types_in_program(expanded_prog) == [ + "constexpr_affine_dequantize", + "constexpr_affine_dequantize", + "exp", + "matmul", + ] + assert get_op_types_in_program(prog) == [ + "constexpr_affine_dequantize", + "exp", + "matmul", + ] + + assert_model_is_valid( + prog, + {"x": X_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + backend=("mlprogram", "fp16"), + minimum_deployment_target=ct.target.iOS16, + ) + + def test_expand_dynamic_weight_static_nonzero_bias(self): + X_SHAPE = (2, 5) + WEIGHT_SHAPE = (3, X_SHAPE[-1]) + + bias_shape = (WEIGHT_SHAPE[0],) + output_shape = (X_SHAPE[0], WEIGHT_SHAPE[0]) + + quantized_weight = np.random.randint(-127, 128, WEIGHT_SHAPE, np.int8) + bias = np.random.rand(*bias_shape) + + @mb.program( + input_specs=[mb.TensorSpec(shape=X_SHAPE)], + opset_version=ct.target.iOS16, + ) + def prog(x): + weight = mb.constexpr_affine_dequantize( + quantized_data=quantized_weight, + scale=1.2, + zero_point=np.int8(3), + axis=0, + ) + screwed_weight = mb.exp(x=weight) + return mb.linear(x=x, weight=screwed_weight, bias=bias) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_dynamic_linear") + assert get_op_types_in_program(prev_prog) == [ + "constexpr_affine_dequantize", + "exp", + "linear", + ] + assert get_op_types_in_program(prog) == [ + "constexpr_affine_dequantize", + "exp", + "matmul", + "add", + ] + assert_model_is_valid( + prog, + {"x": X_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + backend=("mlprogram", "fp16"), + minimum_deployment_target=ct.target.iOS16, + ) + + def test_expand_dynamic_weight_dynamic_bias(self): + X_SHAPE = (2, 5) + WEIGHT_SHAPE = (3, X_SHAPE[-1]) + + bias_shape = (WEIGHT_SHAPE[0],) + output_shape = (X_SHAPE[0], WEIGHT_SHAPE[0]) + + quantized_weight = np.random.randint(-127, 128, WEIGHT_SHAPE, np.int8) + quantized_bias = np.random.randint(-127, 128, bias_shape, np.int8) + + @mb.program( + input_specs=[mb.TensorSpec(shape=X_SHAPE)], + opset_version=ct.target.iOS16, + ) + def prog(x): + weight = mb.constexpr_affine_dequantize( + quantized_data=quantized_weight, + scale=1.2, + zero_point=np.int8(3), + axis=0, + ) + bias = mb.constexpr_affine_dequantize( + quantized_data=quantized_bias, + scale=1.2, + zero_point=np.int8(3), + axis=0, + ) + screwed_weight = mb.exp(x=weight) + screwed_bias = mb.exp(x=bias) + return mb.linear(x=x, weight=screwed_weight, bias=screwed_bias) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::expand_dynamic_linear") + assert get_op_types_in_program(prev_prog) == [ + "constexpr_affine_dequantize", + "constexpr_affine_dequantize", + "exp", + "exp", + "linear", + ] + assert get_op_types_in_program(prog) == [ + "constexpr_affine_dequantize", + "constexpr_affine_dequantize", + "exp", + "exp", + "matmul", + "add", + ] + assert_model_is_valid( + prog, + {"x": X_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + backend=("mlprogram", "fp16"), + minimum_deployment_target=ct.target.iOS16, + ) + + +class TestReduceMeanFusion: + def test_valid_pattern1(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) + def prog(x): + x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) + x1 = mb.mul(x=1.0 / 30, y=x1) + return x1 + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_reduce_mean") + assert get_op_types_in_program(prev_prog) == ["reduce_sum", "mul"] + assert get_op_types_in_program(prog) == ["reduce_mean"] + assert_model_is_valid( + prog, + {"x": (3, 5, 6)}, + expected_output_shapes={block.outputs[0].name: (3, 1, 1)}, + ) + + def test_valid_pattern2(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(4, 5))]) + def prog(x): + x1 = mb.reduce_sum(x=x, axes=[0], keep_dims=False) + x1 = mb.real_div(x=x1, y=4.0) + return x1 + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_reduce_mean") + assert get_op_types_in_program(prev_prog) == ["reduce_sum", "real_div"] + assert get_op_types_in_program(prog) == ["reduce_mean"] + assert_model_is_valid( + prog, + {"x": (4, 5)}, + expected_output_shapes={block.outputs[0].name: (5,)}, + ) + + def test_invalid_pattern1(self): + """ + The mul does not correspond to "1/count" + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) + def prog(x): + x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) + x1 = mb.mul(x=5.0, y=x1) + return x1 + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_reduce_mean") + assert get_op_types_in_program(prog) == ["reduce_sum", "mul"] + + def test_invalid_pattern2(self): + """ + The div does not correspond to "count" + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) + def prog(x): + x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) + x1 = mb.real_div(x=x1, y=31.0) + return x1 + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_reduce_mean") + assert get_op_types_in_program(prog) == ["reduce_sum", "real_div"] + + def test_invalid_pattern3(self): + """ + One of the reduction dim is symbolic + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(3, get_new_symbol(), 6))]) + def prog(x): + x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) + x1 = mb.real_div(x=x1, y=30.0) + return x1 + + pass_name = "common::fuse_reduce_mean" + PASS_REGISTRY[pass_name](prog) + assert get_op_types_in_program(prog) == ["reduce_sum", "real_div"] + + def test_invalid_pattern4(self): + """ + output of reduce_sum is model output + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) + def prog(x): + x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) + y1 = mb.real_div(x=x1, y=30.0) + return y1, x1 + + pass_name = "common::fuse_reduce_mean" + PASS_REGISTRY[pass_name](prog) + assert get_op_types_in_program(prog) == ["reduce_sum", "real_div"] + + def test_invalid_pattern5(self): + """ + output of reduce_sum is feeding into another op + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) + def prog(x): + x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) + y1 = mb.real_div(x=x1, y=30.0) + y2 = mb.mul(x=x1, y=10.0) + y3 = mb.add(x=y1, y=y2) + return y3 + + pass_name = "common::fuse_reduce_mean" + PASS_REGISTRY[pass_name](prog) + assert get_op_types_in_program(prog) == ["reduce_sum", "real_div", "mul", "add"] + + +class TestLoopInvariantElimination: + def test_loop_invariant_elimination1(self): + """ + Invariant pattern: Block input vars are returned as block output vars. + """ + + def body(a, b): + return mb.add(x=a, y=b), b + + def cond(a, b): + a_mean = mb.reduce_mean(x=a, axes=[0, 1]) + b_mean = mb.reduce_mean(x=b, axes=[0, 1]) + return mb.less(x=a_mean, y=b_mean) + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(1, 2)), + mb.TensorSpec(shape=(1, 2)), + ] + ) + def prog(a, b): + # b is loop invariant + return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b)) + + while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0] + assert len(while_op.blocks[0].inputs) == 2 + assert len(while_op.outputs) == 2 + assert len(while_op.loop_vars) == 2 + assert while_op.blocks[0].inputs[0].name == "a_x0" + assert while_op.blocks[0].inputs[1].name == "b_x0" + + prev_prog = copy.deepcopy(prog) + PASS_REGISTRY["common::loop_invariant_elimination"](prog) + assert_same_output_names(prev_prog, prog) + + while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0] + assert len(while_op.blocks[0].inputs) == 1 + assert len(while_op.outputs) == 1 + assert len(while_op.loop_vars) == 1 + assert while_op.blocks[0].inputs[0].name == "a_x0" + + if _VALIDATE_MODEL: + assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)}) + + def test_loop_invariant_elimination2(self): + """ + Invariant pattern: Block outputs var from outside of the block + """ + + @mb.program( + input_specs=[ + mb.TensorSpec(shape=(1, 2)), + mb.TensorSpec(shape=(1, 2)), + ] + ) + def prog(a, b): + def body(a, bx): + return mb.add(x=a, y=b), b + + def cond(a, bx): + a_mean = mb.reduce_mean(x=a, axes=[0, 1]) + b_mean = mb.reduce_mean(x=bx, axes=[0, 1]) + return mb.less(x=a_mean, y=b_mean) + + # b is loop invariant + return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b)) + + while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0] + assert len(while_op.blocks[0].inputs) == 2 + assert len(while_op.outputs) == 2 + assert len(while_op.loop_vars) == 2 + assert while_op.blocks[0].inputs[0].name == "a_x0" + assert while_op.blocks[0].inputs[1].name == "b_x0" + + prev_prog = copy.deepcopy(prog) + PASS_REGISTRY["common::loop_invariant_elimination"](prog) + assert_same_output_names(prev_prog, prog) + + while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0] + assert len(while_op.blocks[0].inputs) == 1 + assert len(while_op.outputs) == 1 + assert len(while_op.loop_vars) == 1 + assert while_op.blocks[0].inputs[0].name == "a_x0" + + if _VALIDATE_MODEL: + assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)}) + + +class TestNoopElimination: + @pytest.mark.parametrize("is_block_output", ((True, False))) + def test_identity(self, is_block_output): + """ + Input graph: + + input -> identity -> (add 1.0 if not is_block_output) -> output + + Output graph: + + if is_block_output: + input -> identity -> output + else: + input -> add 1.0 -> output + """ + SHAPE = (2, 3) + + @mb.program(input_specs=[mb.TensorSpec(shape=SHAPE)]) + def prog(x): + y = mb.identity(x=x) + if not is_block_output: + y = mb.add(x=y, y=1.0) + return y + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + if is_block_output: + assert get_op_types_in_program(prev_prog) == ["identity"] + assert get_op_types_in_program(prog) == ["identity"] + else: + assert get_op_types_in_program(prev_prog) == ["identity", "add"] + assert get_op_types_in_program(prog) == ["add"] + + output_name = block.outputs[0].name + assert_model_is_valid( + prog, + {"x": SHAPE}, + expected_output_shapes={output_name: SHAPE}, + ) + + @pytest.mark.parametrize( + "op_type, pos, val", + itertools.product( + ["add", "mul", "floor_div", "pow", "real_div", "sub"], + ["x", "y"], + [0.0, 1.0, [0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]], + ), + ) + def test_elementwise_elimination(self, op_type, pos, val): + if "div" in op_type and np.prod(val) == 0: + return + if "pow" in op_type and (val != 0 or val != 1): + return + + test_op = getattr(mb, op_type) + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + if pos == "x": + r1 = test_op(x=val, y=x) + else: + r1 = test_op(x=x, y=val) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + original_program = [op_type, "relu"] + new_program = original_program + if op_type in {"add"}: + if val == 0.0 or val == [0.0, 0.0, 0.0, 0.0]: + new_program = ["relu"] + elif op_type in {"mul"}: + if val == 1.0 or val == [1.0, 1.0, 1.0, 1.0]: + new_program = ["relu"] + elif op_type in {"real_div"}: + if pos == "y" and (val == 1.0 or val == [1.0, 1.0, 1.0, 1.0]): + new_program = ["relu"] + elif op_type in {"pow", "floor_div"}: + if pos == "y" and (val == 1.0 or val == [1.0, 1.0, 1.0, 1.0]): + new_program = ["relu"] + elif op_type in {"sub"}: + if pos == "y" and (val == 0.0 or val == [0.0, 0.0, 0.0, 0.0]): + new_program = ["relu"] + + assert get_op_types_in_program(prev_prog) == original_program + assert get_op_types_in_program(prog) == new_program + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_elementwise_broadcast(self): + @mb.program(input_specs=[mb.TensorSpec(shape=[4])]) + def prog(x): + r1 = mb.add(x=x, y=[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + original_program = ["add", "relu"] + + assert get_op_types_in_program(prev_prog) == original_program + assert get_op_types_in_program(prog) == original_program + assert_model_is_valid( + prog, + {"x": [4]}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_elementwise_elimination_fill(self): + """ + When fill layer with dynamic shape is fed to elementwise-binary operation, + even though the tensor can't be materialized at conversion time but no-op + elimination can still be performed based on fill-value + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, get_new_symbol()))]) + def prog(x): + shape = mb.shape(x=x) + y = mb.fill(value=0.0, shape=shape) + x = mb.add(x=x, y=y) + return mb.relu(x=x) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["shape", "fill", "add", "relu"] + assert get_op_types_in_program(prog) == ["shape", "fill", "relu"] + + apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == ["relu"] + + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_reshape_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.reshape(x=x, shape=[1, 8]) + mb.reshape(x=r1, shape=[1, 8]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["reshape", "reshape", "relu"] + assert get_op_types_in_program(prog) == ["reshape", "relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (1, 8)}, + ) + + def test_oneway_split_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.split(x=x, num_splits=1, axis=-1) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["split", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_full_split_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.split(x=x, split_sizes=[4], axis=-1) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["split", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_slicebysize_full_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.slice_by_size(x=x, begin=[0, 0], size=[2, 4]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["slice_by_size", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_slicebysize_to_end_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.slice_by_size(x=x, begin=[0, 0], size=[-1, -1]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["slice_by_size", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_slicebyindex_full_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.slice_by_index(x=x, begin=[0, 0], end=[2, 4]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["slice_by_index", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_slicebyindex_negative_stride(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.slice_by_index( + x=x, + begin=[0, 0], + end=[0, 0], + stride=[1, -1], + begin_mask=[True, True], + end_mask=[True, True], + ) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["slice_by_index", "relu"] + assert get_op_types_in_program(prog) == ["slice_by_index", "relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + @pytest.mark.parametrize( + "begin_mask, end_mask", + itertools.product( + itertools.product([True, False], [True, False]), + itertools.product([True, False], [True, False]), + ), + ) + def test_slicebyindex_mask_elimination(self, begin_mask, end_mask): + @mb.program(input_specs=[mb.TensorSpec(shape=(4, 4))]) + def prog(x): + begin = [1, 1] + end = [1, 1] + for i in range(2): + if not begin_mask[i]: + begin[i] = 0 + if not end_mask[i]: + end[i] = 4 + r1 = mb.slice_by_index( + x=x, begin=begin, end=end, begin_mask=begin_mask, end_mask=end_mask + ) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["slice_by_index", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (4, 4)}, + expected_output_shapes={block.outputs[0].name: (4, 4)}, + ) + + def test_pad_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.pad(x=x, pad=[0, 0, 0, 0]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["pad", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_keep_pad(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.pad(x=x, pad=[4, 4, 2, 2]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["pad", "relu"] + assert get_op_types_in_program(prog) == ["pad", "relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (10, 8)}, + ) + + def test_tile_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.tile(x=x, reps=[1, 1]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["tile", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_keep_tile(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.tile(x=x, reps=[2, 2]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["tile", "relu"] + assert get_op_types_in_program(prog) == ["tile", "relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (4, 8)}, + ) + + def test_upsample_nearest_neighbor_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 2, 4))]) + def prog(x): + r1 = mb.upsample_nearest_neighbor(x=x) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["upsample_nearest_neighbor", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (3, 2, 4)}, + expected_output_shapes={block.outputs[0].name: (3, 2, 4)}, + ) + + def test_upsample_bilinear_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 2, 4))]) + def prog(x): + r1 = mb.upsample_bilinear(x=x) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["upsample_bilinear", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (3, 2, 4)}, + expected_output_shapes={block.outputs[0].name: (3, 2, 4)}, + ) + + def test_resize_bilinear_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 2, 4))]) + def prog(x): + r1 = mb.resize_bilinear(x=x, target_size_height=2, target_size_width=4) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["resize_bilinear", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (3, 2, 4)}, + expected_output_shapes={block.outputs[0].name: (3, 2, 4)}, + ) + + def test_crop_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 2, 4))]) + def prog(x): + r1 = mb.crop(x=x, crop_height=[0, 0], crop_width=[0, 0]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["crop", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (3, 2, 4)}, + expected_output_shapes={block.outputs[0].name: (3, 2, 4)}, + ) + + def test_linear_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + r1 = mb.linear_activation(x=x, alpha=1.0, beta=0.0) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["linear_activation", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 4)}, + ) + + def test_transpose_elimination(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 4))]) + def prog(x): + r1 = mb.transpose(x=x, perm=[0, 1, 2]) + return mb.relu(x=r1) + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") + assert get_op_types_in_program(prev_prog) == ["transpose", "relu"] + assert get_op_types_in_program(prog) == ["relu"] + assert_model_is_valid( + prog, + {"x": (2, 3, 4)}, + expected_output_shapes={block.outputs[0].name: (2, 3, 4)}, + ) + + +class TestRemoveRedundantOps: + def test_redundant_ops_just_after_input_valid_pattern_1(self): + """ + Input graph: + input----->transpose(perm=[0, 2, 1])--->add---> add ---> out + | ^ ^ + | | | + |---->transpose(perm=[0, 2, 1])---- | + | | + | | + |---->transpose(perm=[0, 2, 1])------------ + + Output graph: + input----->transpose(perm=[0, 2, 1])--->add---> add ----> out + | ^ ^ + | | | + |------------- | + | | + |-------------------- + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x1 = mb.transpose(x=x, perm=[0, 2, 1]) + x2 = mb.transpose(x=x, perm=[0, 2, 1]) + x3 = mb.transpose(x=x, perm=[0, 2, 1]) + z = mb.add(x=x1, y=x2) + z = mb.add(x=z, y=x3) + return z + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == [ + "transpose", + "transpose", + "transpose", + "add", + "add", + ] + assert get_op_types_in_program(prog) == ["transpose", "add", "add"] + assert_model_is_valid( + prog, + {"x": (2, 3, 5)}, + expected_output_shapes={block.outputs[0].name: (2, 5, 3)}, + ) + + def test_redundant_ops_just_after_input_valid_pattern_2(self): + """ + Input graph: + input----->leaky_relu(alpha=0.3)--->add---> add ---> out + | ^ ^ + | | | + |----->leaky_relu(alpha=0.3)--- | + | | + | | + |---->leaky_relu(alpha=0.3)------------ + + Output graph: + input--------->leaky_relu(alpha=0.3)--->add---> add ----> out + | ^ ^ + | | | + |------------- | + | | + |--------------------- + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x1 = mb.leaky_relu(x=x, alpha=0.3) + x2 = mb.leaky_relu(x=x, alpha=0.3) + x3 = mb.leaky_relu(x=x, alpha=0.3) + z = mb.add(x=x1, y=x2) + z = mb.add(x=z, y=x3) + return z + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == [ + "leaky_relu", + "leaky_relu", + "leaky_relu", + "add", + "add", + ] + assert get_op_types_in_program(prog) == ["leaky_relu", "add", "add"] + assert_model_is_valid( + prog, + {"x": (2, 3, 5)}, + expected_output_shapes={block.outputs[0].name: (2, 3, 5)}, + ) + + def test_redundant_ops_just_after_input_valid_pattern_3(self): + """ + Input graph: + input----->leaky_relu(alpha=0.4)--->add---> add ---> out + | ^ ^ + | | | + |----->leaky_relu(alpha=0.3)--- | + | | + | | + |---->leaky_relu(alpha=0.3)------------ + + Output graph: + input----->leaky_relu(alpha=0.4)--->add---> add ---> out + | ^ ^ + | | | + |----->leaky_relu(alpha=0.3)---------- + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x1 = mb.leaky_relu(x=x, alpha=0.4) + x2 = mb.leaky_relu(x=x, alpha=0.3) + x3 = mb.leaky_relu(x=x, alpha=0.3) + z = mb.add(x=x1, y=x2) + z = mb.add(x=z, y=x3) + return z + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == [ + "leaky_relu", + "leaky_relu", + "leaky_relu", + "add", + "add", + ] + assert get_op_types_in_program(prog) == ["leaky_relu", "leaky_relu", "add", "add"] + + leaky_relu_ops = block.find_ops(op_type="leaky_relu") + assert leaky_relu_ops[0].alpha.val == np.float32(0.4) + assert leaky_relu_ops[1].alpha.val == np.float32(0.3) + + def test_redundant_ops_just_after_input_invalid_pattern_1(self): + """ + input----->transpose(perm=[0, 2, 1])---> reshape(shape=[-1]) -----> add ---> out + | ^ + | | + |---->transpose(perm=[1, 0, 2])----> reshape(shape=[-1])------ + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x1 = mb.transpose(x=x, perm=[0, 2, 1]) + x2 = mb.transpose(x=x, perm=[1, 0, 2]) + x1 = mb.reshape(x=x1, shape=[-1]) + x2 = mb.reshape(x=x2, shape=[-1]) + z = mb.add(x=x1, y=x2) + return z + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == [ + "transpose", + "transpose", + "reshape", + "reshape", + "add", + ] + assert get_op_types_in_program(prog) == [ + "transpose", + "transpose", + "reshape", + "reshape", + "add", + ] + assert_model_is_valid( + prog, + {"x": (2, 3, 5)}, + expected_output_shapes={block.outputs[0].name: (30,)}, + ) + + def test_redundant_ops_just_after_input_invalid_pattern_2(self): + """ + input----->leaky_relu(alpha=0.3) -----> add ---> out + | ^ + | | + |---->leaky_relu(alpha=0.4)------- + + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x1 = mb.leaky_relu(x=x, alpha=0.3) + x2 = mb.leaky_relu(x=x, alpha=0.4) + z = mb.add(x=x1, y=x2) + return z + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == ["leaky_relu", "leaky_relu", "add"] + assert get_op_types_in_program(prog) == ["leaky_relu", "leaky_relu", "add"] + assert_model_is_valid( + prog, + {"x": (2, 3, 5)}, + expected_output_shapes={block.outputs[0].name: (2, 3, 5)}, + ) + + def test_redundant_ops_just_after_input_invalid_pattern_3(self): + """ + test case, when inputs of 1 op is a subset of the inputs of the other op + + input----->layer_norm1 -----> add ---> out + | ^ + | | + |---->layer_norm2------- + + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(1, 3, 2))]) + def prog(x): + x1 = mb.layer_norm(x=x, axes=[2], epsilon=1e-4) + gamma_val = np.array([1.0, 1.0], dtype=np.float32) + beta_val = np.array([1.0, 0.0], dtype=np.float32) + x2 = mb.layer_norm(x=x, axes=[2], epsilon=1e-4, gamma=gamma_val, beta=beta_val) + z = mb.add(x=x1, y=x2) + return z + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == ["layer_norm", "layer_norm", "add"] + assert get_op_types_in_program(prog) == ["layer_norm", "layer_norm", "add"] + assert_model_is_valid( + prog, + {"x": (1, 3, 2)}, + expected_output_shapes={block.outputs[0].name: (1, 3, 2)}, + ) + + @staticmethod + def _make_repeated_conv_prog(redundant_conv=True, out_channel=2): + prog = mil.Program() + func_inputs = {"x": mb.placeholder(shape=[1, 4, 5, 5])} + with Function(func_inputs) as ssa_fun: + x = ssa_fun.inputs["x"] + x = mb.relu(x=x) + W = np.random.rand(out_channel, 4, 3, 3) + if redundant_conv: + bias = np.random.rand(out_channel) + x1 = mb.conv(x=x, weight=W, bias=bias, pad_type="same", strides=[1, 1]) + x2 = mb.conv(x=x, weight=W, bias=bias, pad_type="same", strides=[1, 1]) + else: + x1 = mb.conv( + x=x, weight=W, bias=np.random.rand(out_channel), pad_type="same", strides=[1, 1] + ) + x2 = mb.conv( + x=x, weight=W, bias=np.random.rand(out_channel), pad_type="same", strides=[1, 1] + ) + x1 = mb.relu(x=x1) + x2 = mb.relu(x=x2) + x1 = mb.avg_pool(x=x1, kernel_sizes=[2, 2], strides=[1, 1], pad_type="same") + z = mb.concat(values=(x1, x2), axis=-3) + ssa_fun.set_outputs([z]) + prog.add_function("main", ssa_fun) + return prog + + def test_redundant_ops_inside_graph_valid_pattern(self): + """ + Input graph: + input--> relu--------->conv------>relu----> pool ---> concat ---> out + | ^ + | | + |---->conv---->relu---------------------------- + + Output graph: + input-> relu--->conv------>relu----> pool ---> concat ---> out + | ^ + | | + |------------------- + """ + prog = self._make_repeated_conv_prog(redundant_conv=True) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == [ + "relu", + "conv", + "conv", + "relu", + "relu", + "avg_pool", + "concat", + ] + assert get_op_types_in_program(prog) == ["relu", "conv", "relu", "avg_pool", "concat"] + assert_model_is_valid( + prog, + {"x": (1, 4, 5, 5)}, + expected_output_shapes={block.outputs[0].name: (1, 4, 5, 5)}, + ) + + def test_redundant_ops_inside_graph_with_large_const(self): + """ + For the large constants, they need to be deduplicated by the const_deduplication first. + This test is making sure the converter is not doing any "brutal force" comparison. + + Input graph: + input--> relu--------->conv------>relu----> pool ---> concat ---> out + | ^ + | | + |---->conv---->relu---------------------------- + + Output graph: + input-> relu--->conv------>relu----> pool ---> concat ---> out + | ^ + | | + |------------------- + """ + # The remove_redundant_ops is not doing brutal force array comparison + prog = self._make_repeated_conv_prog(redundant_conv=True, out_channel=10) + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + ops_in_prev_prog = [ + "relu", + "conv", + "conv", + "relu", + "relu", + "avg_pool", + "concat", + ] + assert get_op_types_in_program(prev_prog) == ops_in_prev_prog + assert get_op_types_in_program(prog) == ops_in_prev_prog + + # We need to first run the const_deduplication pass. + prog = self._make_repeated_conv_prog(redundant_conv=True, out_channel=10) + _, _, block = apply_pass_and_basic_check(prog, "common::const_deduplication") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + _, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + + assert get_op_types_in_program(prog) == ["relu", "conv", "relu", "avg_pool", "concat"] + assert_model_is_valid( + prog, + {"x": (1, 4, 5, 5)}, + expected_output_shapes={block.outputs[0].name: (1, 20, 5, 5)}, + ) + + def test_redundant_ops_inside_graph_invalid_pattern(self): + """ + input--->relu--------->conv1------>relu----> pool ---> concat ---> out + | ^ + | | + |---->conv2---->relu--------------------------- + """ + prog = self._make_repeated_conv_prog(redundant_conv=False) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == [ + "relu", + "conv", + "conv", + "relu", + "relu", + "avg_pool", + "concat", + ] + assert get_op_types_in_program(prog) == [ + "relu", + "conv", + "conv", + "relu", + "relu", + "avg_pool", + "concat", + ] + assert_model_is_valid( + prog, + {"x": (1, 4, 5, 5)}, + expected_output_shapes={block.outputs[0].name: (1, 4, 5, 5)}, + ) + + def test_redundant_op_as_output_valid_pattern_1(self): + """ + Input graph: + input--------->relu------> out1 + | + | + |---->relu---->tanh---> out2 + + Output graph: + input--------->relu------> out1 + | + | + |---->tanh---> out2 + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x1 = mb.relu(x=x) + x2 = mb.relu(x=x) + return x1, mb.tanh(x=x2) + + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") + assert get_op_types_in_program(prev_prog) == ["relu", "relu", "tanh"] + assert get_op_types_in_program(prog) == ["relu", "tanh"] + assert_model_is_valid( + prog, + {"x": (2, 3, 5)}, + expected_output_shapes={ + block.outputs[0].name: (2, 3, 5), + block.outputs[1].name: (2, 3, 5), + }, + ) + + def test_redundant_op_as_output_invalid_pattern_1(self): + """ + Input graph: + input--------->relu------> out1 + | + | + |---->relu---> out2 + + "common::remove_redundant_ops" pass does not remove ops if their outputs + are block outputs. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x1 = mb.relu(x=x) + x2 = mb.relu(x=x) + return x1, x2 + + prev_prog, _, block = apply_pass_and_basic_check( + prog, + "common::remove_redundant_ops", + ) + assert get_op_types_in_program(prev_prog) == ["relu", "relu"] + assert get_op_types_in_program(prog) == ["relu", "relu"] + assert_model_is_valid( + prog, + {"x": (2, 3, 5)}, + expected_output_shapes={ + block.outputs[0].name: (2, 3, 5), + block.outputs[1].name: (2, 3, 5), + }, + ) + + def test_cond_block_program(self): + """ + - Test identical ops within different blocks are not removed. The "relu" op inside true and + false blocks are not removed since they are in different blocks. + - Test ops that have blocks inside them are not removed. There are two cond ops here, + with identical inputs but they are not removed, since they are ops that have nested block + inside them. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) + def prog(x): + x1 = mb.cast(x=x, dtype="bool") + + def true_fn(): + x = mb.shape(x=x1) + x = mb.cast(x=x, dtype="fp32") + return mb.add(x=x, y=1.0) + + def false_fn(): + x = mb.shape(x=x1) + x = mb.cast(x=x, dtype="fp32") + return mb.add(x=x, y=-1.0) + + z1 = mb.cond(pred=x1, _true_fn=true_fn, _false_fn=false_fn) + z2 = mb.cond(pred=x1, _true_fn=true_fn, _false_fn=false_fn) + z = mb.add(x=z1, y=z2) + return z + + prev_prog, _, block = apply_pass_and_basic_check( + prog, + "common::remove_redundant_ops", + ) + assert get_op_types_in_program(prev_prog) == ["cast", "cond", "cond", "add"] + assert get_op_types_in_program(prog) == ["cast", "cond", "cond", "add"] + cond_op = prog.find_ops(op_type="cond")[0] + assert cond_op.blocks[0].operations[0].op_type == "shape" + assert cond_op.blocks[1].operations[0].op_type == "shape" + assert_model_is_valid( + prog, + {"x": (1,)}, + expected_output_shapes={block.outputs[0].name: (1,)}, + ) + + def test_concat_op_pattern(self): + """ + Input graph: + ---------------> concat ------> log ------> out1 + | ^ + | | + input--------->relu------> concat ------> relu----> out2 + | ^ | + | | | + |---->tanh-------------------- + + Output graph: + |------>log ------> out1 + | + | + input--------->relu------> concat ------> relu----> out2 + | ^ + | | + |---->tanh--------- + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 5))]) + def prog(x): + x1 = mb.relu(x=x) + x2 = mb.tanh(x=x) + c1 = mb.concat(values=(x1, x2), axis=0) + c2 = mb.concat(values=(x1, x2), axis=0) + z1 = mb.log(x=c1) + z2 = mb.relu(x=c2) + return z1, z2 + + prev_prog, _, block = apply_pass_and_basic_check( + prog, + "common::remove_redundant_ops", + ) + assert get_op_types_in_program(prev_prog) == [ + "relu", + "tanh", + "concat", + "concat", + "log", + "relu", + ] + assert get_op_types_in_program(prog) == ["relu", "tanh", "concat", "log", "relu"] + assert_model_is_valid( + prog, + {"x": (10, 5)}, + expected_output_shapes={block.outputs[0].name: (20, 5), block.outputs[1].name: (20, 5)}, + ) + + def test_multiple_redundant_child_ops_pattern(self): + """ + Input graph + + input -------------> reshape ----------> add ---------> out1 + | ^ + | | + |-------> reshape --------------- + | + |------> slice_by_size-----> add ----------> out2 + | ^ + | | + |------> slice_by_size ------- + + Output graph + + input -------------> reshape ----------> add ------------> out1 + | | ^ + | | | + | |--------- + | + |------> slice_by_size----------> add -----------------> out2 + | ^ + | | + |--------------------- + + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 5, 4))]) + def prog(x): + x1 = mb.reshape(x=x, shape=[5, 2, -1]) + x2 = mb.reshape(x=x, shape=[5, 2, -1]) + x3 = mb.slice_by_size(x=x, begin=[0, 0, 1], size=[2, 4, 3]) + x4 = mb.slice_by_size(x=x, begin=[0, 0, 1], size=[2, 4, 3]) + z1 = mb.add(x=x1, y=x2) + z2 = mb.add(x=x3, y=x4) + return z1, z2 + + prev_prog, _, block = apply_pass_and_basic_check( + prog, + "common::remove_redundant_ops", + ) + assert get_op_types_in_program(prev_prog) == [ + "reshape", + "reshape", + "slice_by_size", + "slice_by_size", + "add", + "add", + ] + assert get_op_types_in_program(prog) == ["reshape", "slice_by_size", "add", "add"] + assert_model_is_valid( + prog, + {"x": (10, 5, 4)}, + expected_output_shapes={ + block.outputs[0].name: (5, 2, 20), + block.outputs[1].name: (2, 4, 3), + }, + ) + + def test_random_distribution_op_invalid_pattern(self): + """ + Identical random ops are not removed + + input----->cast---->random_uniform------> add ---> out + | ^ + | | + |---->random_uniform------------ + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(3,))]) + def prog(shape): + shape = mb.cast(x=shape, dtype="int32") + x1 = mb.random_uniform(shape=shape, low=0.0, high=1.0, seed=11) + x2 = mb.random_uniform(shape=shape, low=0.0, high=1.0, seed=11) + return mb.add(x=x1, y=x2) + + prev_prog, _, block = apply_pass_and_basic_check( + prog, + "common::remove_redundant_ops", + ) + assert get_op_types_in_program(prev_prog) == [ + "cast", + "random_uniform", + "random_uniform", + "add", + ] + assert get_op_types_in_program(prog) == ["cast", "random_uniform", "random_uniform", "add"] + + def test_nonreplaceable_vars(self): + """ + Nonreplaceable vars shouldn't be removed, e.g. palettized weights + + const_1----->add---->add_1------| + | | + input add---->output + | | + const_2----->add---->add_2------| + """ + + def _constexpr_lut_to_dense(): + lut_data = np.array( + [-19.0, 4.0, 0.0, -1.0, 1.0, 3.0, 5.0, -8.0, 19, 13, 42, 4.5, 5.4, 2.0, -6, -7] + ).astype(np.float32) + indices = np.array([212, 21]).astype(np.uint8) + shape = np.array([4, 1]).astype(np.uint32) + return mb.constexpr_lut_to_dense(lut=lut_data, indices=indices, shape=shape) + + @mb.program(input_specs=[mb.TensorSpec(shape=(4, 1))]) + def prog(x): + constexpr_1 = _constexpr_lut_to_dense() + constexpr_2 = _constexpr_lut_to_dense() + c = mb.add(x=constexpr_1, y=x) + d = mb.add(x=constexpr_2, y=x) + return mb.add(x=c, y=d) + + prev_prog, _, _ = apply_pass_and_basic_check( + prog, + "common::remove_redundant_ops", + ) + assert get_op_types_in_program(prev_prog) == get_op_types_in_program(prog) + + def test_redundant_ops_time_complexity(self): + """ + Test the graph pass doesn't re-run right away after detecting a redundant pattern, + in order to keep time complexity low. + + In this example, a program with 26 ops is first traversed, and 5 relu ops are removed. + At the time of second traversal, there are only 21 remaining ops. + As the result, the total ops of visited is 26 + 21 = 47. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x = mb.cos(x=x) + for i in range(5): + x1 = mb.relu(x=x) + x2 = mb.relu(x=x) + z = mb.add(x=x1, y=x2) + z = mb.add(x=z, y=x2) + x = mb.sin(x=x) + return x + + graph_pass = remove_redundant_ops() + graph_pass.apply(prog) + + assert get_op_types_in_program(prog) == ["cos"] + ["relu", "add", "add", "sin"] * 5 + assert graph_pass._num_of_visited_ops == 47 + + def test_redundant_ops_time_complexity_pattern_2(self): + """ + Test the graph pass doesn't re-run right away after detecting a redundant pattern, + in order to keep time complexity low. + + In this example, there are three groups of identical leaky_relu ops can be removed, + and the algorithm should be run in the fashion that only goes through the + program twice. As the result, the total ops visited is: + + 8 + (8 - 3) = 13 + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) + def prog(x): + x = mb.cos(x=x) + x1 = mb.leaky_relu(x=x, alpha=0.2) + x2 = mb.leaky_relu(x=x, alpha=0.2) + x3 = mb.leaky_relu(x=x, alpha=0.3) + x4 = mb.leaky_relu(x=x, alpha=0.3) + x5 = mb.leaky_relu(x=x, alpha=0.4) + x6 = mb.leaky_relu(x=x, alpha=0.4) + return mb.sin(x=x6) + + graph_pass = remove_redundant_ops() + graph_pass.apply(prog) + + assert get_op_types_in_program(prog) == ["cos"] + ["leaky_relu"] * 3 + ["sin"] + assert graph_pass._num_of_visited_ops == 13 + + +class TestRemoveSymbolicReshape: + def test_remove_symbolic_reshape(self): + sym_b = Symbol("s0") + original_shape = (sym_b, Symbol("s1"), 2) + reshape_name = "reshape" + + @mb.program(input_specs=[mb.TensorSpec(shape=(sym_b, 4))]) + def prog(x): + # const cannot represent symbolic values. Use _const_symbolic + shape = mb._const_symbolic(val=original_shape) + return mb.reshape(x=x, shape=shape, name=reshape_name) + + reshape_op = prog.find_ops(prefix=reshape_name, op_type="reshape", exactly_one=True)[0] + shape_var = reshape_op.shape + reshaped_var = reshape_op.outputs[0] + assert np.all(shape_var.sym_val == original_shape) + assert np.all(reshaped_var.shape == (sym_b, 2, 2)) + + # Note: we cannot deepcopy prog with symbol. + prev_outputs = [o.name for o in prog["main"].outputs] + PASS_REGISTRY["common::remove_symbolic_reshape"](prog) + curr_outputs = [o.name for o in prog["main"].outputs] + assert curr_outputs == prev_outputs + + reshape_op = prog.find_ops(prefix=reshape_name, op_type="reshape", exactly_one=True)[0] + shape_var = reshape_op.shape + reshaped_var = reshape_op.outputs[0] + # shape param cannot be symbolic after the pass + assert np.all(shape_var.sym_val == (-1, 2, 2)) + # output shape is still symbolic + assert np.all(reshaped_var.shape == (sym_b, 2, 2)) + + if _VALIDATE_MODEL: + assert_model_is_valid(prog, {"x": (3, 4)}) + + +class TestTopologicalReorder: + def test_move_sink_casts_to_the_end(self): + """ + Input graph: + x (input) ---> square ---> cast (output) + | + | -----------> log ------> cast (output) + | + | -----------> relu -----> cast ----> relu (output) + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + x = mb.cast(x=x, dtype="fp16") + x1 = mb.square(x=x) + x2 = mb.cast(x=x1, dtype="fp32") + x3 = mb.log(x=x) + x4 = mb.cast(x=x3, dtype="fp32") + x5 = mb.relu(x=x) + x6 = mb.cast(x=x5, dtype="fp32") + x7 = mb.relu(x=x6) + return x2, x4, x7 + + assert get_op_types_in_program(prog) == [ + "cast", + "square", + "cast", + "log", + "cast", + "relu", + "cast", + "relu", + ] + + apply_pass_and_basic_check(prog, "common::topological_reorder") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == [ + "cast", + "square", + "log", + "relu", + "cast", + "relu", + "cast", + "cast", + ] + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={ + block.outputs[0].name: (10, 20), + block.outputs[1].name: (10, 20), + block.outputs[2].name: (10, 20), + }, + ) + + def test_move_sink_cast_transpose_to_the_end(self): + """ + Input graph: + x (input) ---> square ---> transpose ---> cast (output) + | + | -----------> log ------> transpose ---> cast (output) + | + | -----------> relu -----> cast ----> relu (output) + | + | -----------> relu (output) + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + x = mb.cast(x=x, dtype="fp16") + x1 = mb.square(x=x) + x1_t = mb.transpose(x=x1, perm=[1, 0]) + x2 = mb.cast(x=x1_t, dtype="fp32") + x3 = mb.log(x=x) + x3_t = mb.transpose(x=x3, perm=[1, 0]) + x4 = mb.cast(x=x3_t, dtype="fp32") + x5 = mb.relu(x=x) + x6 = mb.cast(x=x5, dtype="fp32") + x7 = mb.relu(x=x6) + x8 = mb.relu(x=x) + return x2, x4, x7, x8 + + assert get_op_types_in_program(prog) == [ + "cast", + "square", + "transpose", + "cast", + "log", + "transpose", + "cast", + "relu", + "cast", + "relu", + "relu", + ] + + apply_pass_and_basic_check(prog, "common::topological_reorder") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == [ + "cast", + "square", + "log", + "relu", + "cast", + "relu", + "relu", + "transpose", + "cast", + "transpose", + "cast", + ] + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={ + block.outputs[0].name: (20, 10), + block.outputs[1].name: (20, 10), + block.outputs[2].name: (10, 20), + block.outputs[3].name: (10, 20), + }, + ) + + def test_move_multiple_uses_overlapping(self): + """ + Input graph: + x (input) ---> cast ---> cast (output) + | + |-------> transpose ---> transpose (output) + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + x1 = mb.cast(x=x, dtype="fp16") + x2 = mb.cast(x=x1, dtype="fp32") + x3 = mb.transpose(x=x1, perm=[1, 0]) + x4 = mb.transpose(x=x3, perm=[1, 0]) + return x2, x4 + + assert get_op_types_in_program(prog) == ["cast", "cast", "transpose", "transpose"] + + apply_pass_and_basic_check(prog, "common::topological_reorder") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == ["cast", "transpose", "transpose", "cast"] + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={ + block.outputs[0].name: (10, 20), + block.outputs[1].name: (10, 20), + }, + ) + + def test_move_split_to_first_use(self): + """ + Input graph: + x (input) ---> split ---> square ---> add (output) + | | | + | | --------------------| + | + | -----------> square --------------> relu (output) + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + s1, s2 = mb.split(x=x, num_splits=2, axis=0) + x2 = mb.square(x=x) + x3 = mb.relu(x=x2) + s1_1 = mb.square(x=s1) + s3 = mb.add(x=s1_1, y=s2) + return x3, s3 + + assert get_op_types_in_program(prog) == ["split", "square", "relu", "square", "add"] + + block = prog.functions["main"] + # Reorder `split` op to test op with multiple output case + topological_reorder._move_operations_to_the_end_block(block, ["split"]) + assert get_op_types_in_program(prog) == ["square", "relu", "split", "square", "add"] + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={ + block.outputs[0].name: (10, 20), + block.outputs[1].name: (5, 20), + }, + ) + + def test_move_transpose_before_subblock(self): + """ + Input graph: + x (input) ---> cast ---> transpose ---> cast (output) + | + | -----------> square ------> transpose (x1_t) ---> cast (output) + | + | -----------> squeeze ----> equal ----> squeeze + | + (true) <--- / \ ---> (false) + | | + | /<-(x1_t)->\ | + add <-/ \--> add + |---------> | <---------| + | + add ---> cast (output) + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + x = mb.cast(x=x, dtype="fp16") + x1 = mb.square(x=x) + x1_t = mb.transpose(x=x1, perm=[1, 0]) + + def true_fn(): + return mb.add(x=x1_t, y=np.float16(1), name="x2") + + def false_fn(): + return mb.add(x=x1_t, y=np.float16(2), name="x2") + + is_one = mb.equal(x=mb.squeeze(x=x), y=np.float16(1.0)) + pred = mb.squeeze(x=is_one) + x3 = mb.cond(pred=pred, _true_fn=true_fn, _false_fn=false_fn) + x4 = mb.add(x=x1_t, y=x3) + x5 = mb.cast(x=x4, dtype="fp32") + return x5 + + apply_pass_and_basic_check(prog, "common::topological_reorder") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == [ + "cast", + "square", + "squeeze", + "equal", + "squeeze", + "transpose", + "cond", + "add", + "cast", + ] + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={block.outputs[0].name: (20, 10)}, + ) + + def test_cast_transpose_already_at_the_end(self): + """ + Input graph: + x (input) ---> square ---> transpose ---> cast (output) + | + | -----------> log ------> transpose ---> cast (output) + | + | -----------> relu -----> cast ----> relu (output) + | + | -----------> relu (output) + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + def prog(x): + x = mb.cast(x=x, dtype="fp16") + x1 = mb.square(x=x) + x3 = mb.log(x=x) + x5 = mb.relu(x=x) + x6 = mb.cast(x=x5, dtype="fp32") + x7 = mb.relu(x=x6) + x8 = mb.relu(x=x) + x1_t = mb.transpose(x=x1, perm=[1, 0]) + x2 = mb.cast(x=x1_t, dtype="fp32") + x3_t = mb.transpose(x=x3, perm=[1, 0]) + x4 = mb.cast(x=x3_t, dtype="fp32") + return x2, x4, x7, x8 + + assert get_op_types_in_program(prog) == [ + "cast", + "square", + "log", + "relu", + "cast", + "relu", + "relu", + "transpose", + "cast", + "transpose", + "cast", + ] + + apply_pass_and_basic_check(prog, "common::topological_reorder") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == [ + "cast", + "square", + "log", + "relu", + "cast", + "relu", + "relu", + "transpose", + "cast", + "transpose", + "cast", + ] + + assert_model_is_valid( + prog, + {"x": (10, 20)}, + expected_output_shapes={ + block.outputs[0].name: (20, 10), + block.outputs[1].name: (20, 10), + block.outputs[2].name: (10, 20), + block.outputs[3].name: (10, 20), + }, + ) diff --git a/coremltools/converters/mil/mil/passes/tests/test_lower_complex_dialect_ops.py b/coremltools/converters/mil/mil/passes/tests/test_lower_complex_dialect_ops.py index cf6b5f409..e43809fbd 100644 --- a/coremltools/converters/mil/mil/passes/tests/test_lower_complex_dialect_ops.py +++ b/coremltools/converters/mil/mil/passes/tests/test_lower_complex_dialect_ops.py @@ -3,12 +3,17 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +import copy + import numpy as np import pytest -from coremltools import ComputeUnit +from coremltools import ComputeUnit from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil.passes.defs.lower_complex_dialect_ops import _calculate_dft_matrix +from coremltools.converters.mil.mil.passes.defs.lower_complex_dialect_ops import ( + _calculate_dft_matrix, +) +from coremltools.converters.mil.mil.scope import ScopeInfo, ScopeSource from coremltools.converters.mil.testing_utils import ( apply_pass_and_basic_check, assert_model_is_valid, @@ -38,6 +43,33 @@ def prog(x): expected_output_shapes={block.outputs[0].name: (1, 2, 3)}, ) + def test_lower_fft_with_scope(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3))]) + def prog(x): + with mb.scope(ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["m1"])): + fft_res = mb.complex_fft(data=x) + with mb.scope(ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["m2"])): + return mb.complex_real(data=fft_res) + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_TYPE) + + apply_pass_and_basic_check( + prog, + "common::lower_complex_dialect_ops", + skip_essential_scope_check=True, # this graph pass introduces two subgraphs, while only one of them is used. + ) + apply_pass_and_basic_check( + prog, + "common::dead_code_elimination", + ) + + # since the _replace_var is operated on the output of complex_real, so the scope info should be "m2" + block = prog.functions["main"] + for op in block.operations: + assert op.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["m2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["lower_complex_dialect_ops"], + } + def test_lower_fft(self): @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3))]) def prog(x): @@ -45,13 +77,34 @@ def prog(x): real_data = mb.complex_real(data=fft_res) return real_data - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::lower_complex_dialect_ops") + # Test the apply_pass_and_basic_check utils has the ability to catch errors regarding incomplete scope information + with pytest.raises( + ValueError, match="is missing essential scopes ScopeSource.TORCHSCRIPT_MODULE_TYPE" + ): + prev_prog, _, block = apply_pass_and_basic_check( + copy.deepcopy(prog), + "common::lower_complex_dialect_ops", + ) + + prev_prog, _, block = apply_pass_and_basic_check( + prog, + "common::lower_complex_dialect_ops", + skip_essential_scope_check=True, # this graph pass introduces two subgraphs, while only one of them is used. + ) assert get_op_types_in_program(prev_prog) == ["complex_fft", "complex_real"] after_pass_op_types_set = set(get_op_types_in_program(prog)) # Verifies that the complex dialect ops got lowered to core ops. assert "complex_fft" not in after_pass_op_types_set assert "complex_real" not in after_pass_op_types_set + apply_pass_and_basic_check( + prog, + "common::dead_code_elimination", + ) + # Verifies that the complex dialect ops got lowered to core ops. + assert "complex_fft" not in after_pass_op_types_set + assert "complex_real" not in after_pass_op_types_set + inputs = {"x": (1, 2, 3)} assert_model_is_valid( prog, @@ -59,10 +112,7 @@ def prog(x): expected_output_shapes={block.outputs[0].name: (1, 2, 3)}, ) - @pytest.mark.parametrize( - "onesided", - [True, False] - ) + @pytest.mark.parametrize("onesided", [True, False]) def test_calculate_dft_matrix(self, onesided): expected_C = np.zeros((16, 16)) expected_S = np.zeros((16, 16)) @@ -71,7 +121,7 @@ def test_calculate_dft_matrix(self, onesided): for k in range(16): expected_C[k, :] = np.cos(2 * np.pi * k * _range / 16) expected_S[k, :] = np.sin(2 * np.pi * k * _range / 16) - + if onesided: expected_C = expected_C[:9] expected_S = expected_S[:9] @@ -80,7 +130,9 @@ def test_calculate_dft_matrix(self, onesided): def prog(x): return _calculate_dft_matrix(x, onesided=onesided) - model = ct_convert(program=prog, convert_to=("neuralnetwork", "fp32"), compute_units=ComputeUnit.CPU_ONLY) + model = ct_convert( + program=prog, convert_to=("neuralnetwork", "fp32"), compute_units=ComputeUnit.CPU_ONLY + ) p = model.predict({"x": np.array([16.0])}) cos_matrix, sin_matrix = p["cos_0"], p["sin_0"] diff --git a/coremltools/converters/mil/mil/passes/tests/test_optimize_linear_passes.py b/coremltools/converters/mil/mil/passes/tests/test_optimize_linear_passes.py new file mode 100644 index 000000000..9a90104a3 --- /dev/null +++ b/coremltools/converters/mil/mil/passes/tests/test_optimize_linear_passes.py @@ -0,0 +1,324 @@ +# Copyright (c) 2024, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import copy +import itertools + +import numpy as np +import pytest + +from coremltools.converters.mil.mil import Builder as mb +from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY +from coremltools.converters.mil.testing_reqs import backends +from coremltools.converters.mil.testing_utils import ( + apply_pass_and_basic_check, + assert_model_is_valid, + assert_op_count_match, + assert_same_output_names, + get_op_types_in_program, +) + +from .test_passes import _VALIDATE_MODEL + + +class TestFuseLinearBias: + @staticmethod + def _apply_transform(inputs, func, is_first_input, has_bias): + """ + Utility function to test the weight/bias transform function in linear bias fusion pass. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 4))]) + def prog(x): + + if has_bias: + linear = mb.linear( + x=x, + weight=inputs["linear_weight"], + bias=inputs["linear_bias"], + ) + else: + linear = mb.linear( + x=x, + weight=inputs["linear_weight"], + ) + + if is_first_input: + kwargs = { + "x": linear, + "y": inputs["bias"], + } + else: + kwargs = { + "x": inputs["bias"], + "y": linear, + } + + x = func(**kwargs) + return x + + apply_pass_and_basic_check( + prog, + "common::fuse_linear_bias", + ) + + # get the updated weight from the prog + linear_op = [] + for op in prog["main"].operations: + if op.op_type == "const": + continue + linear_op.append(op) + assert len(linear_op) == 1, "should only have one linear layer." + + return linear_op[0].weight.val, linear_op[0].bias.val + + @pytest.mark.parametrize( + "op_type, is_first_input, has_bias, broadcast", + itertools.product( + ["add", "sub"], + [True, False], + [True, False], + [True, False], + ), + ) + def test_transform_linear(self, op_type, is_first_input, has_bias, broadcast): + """ + Test the weight / bias transform function in the linear bias fusion pass + """ + weight = np.reshape(np.arange(8), (2, 4)).astype(np.float32) + linear_bias = ( + np.array([1, 2]).astype(np.float32) if has_bias else np.array([0, 0]).astype(np.float32) + ) + bias = np.array([3, 4]).astype(np.float32) + if broadcast: + bias = np.reshape(bias, (1, 2)) + + inputs = { + "linear_weight": weight, + "linear_bias": linear_bias, + "bias": bias, + } + + if op_type == "add": + func = mb.add + elif op_type == "sub": + func = mb.sub + + new_weight, new_bias = self._apply_transform( + inputs, + func, + is_first_input, + has_bias, + ) + if broadcast: + bias = np.reshape(bias, (2,)) + + if op_type == "sub" and not is_first_input: + expected_weight = -weight + else: + expected_weight = weight + + if op_type == "sub": + if is_first_input: + expected_bias = linear_bias - bias + else: + expected_bias = bias - linear_bias + else: + expected_bias = linear_bias + bias + + np.testing.assert_almost_equal(new_weight, expected_weight) + np.testing.assert_almost_equal(new_bias, expected_bias) + + @pytest.mark.parametrize( + "rank, op_type, is_first_input, broadcast, backend", + itertools.product([1, 2, 3], ["add", "sub"], [True, False], [True, False], backends), + ) + def test_linear_bias_fusion(self, rank, op_type, is_first_input, broadcast, backend): + """ + Input graph: + Const + | + V + input -----> linear -----> add/sub ---> out + + Output graph: + input -----> linear ----> out + """ + input_shape = [1, 2, 3] + input_shape = input_shape[-rank:] + input_shape = tuple(input_shape) + + @mb.program(input_specs=[mb.TensorSpec(shape=input_shape)]) + def prog(x): + linear_weight = np.reshape(np.arange(6), (2, 3)).astype(np.float32) + linear_bias = np.array([1.0, 2.0]) + bias = np.array([3.0, 4.0]) + if broadcast: + if rank >= 2: + bias = np.reshape(bias, (1, 2)) + + x = mb.linear( + x=x, + weight=linear_weight, + bias=linear_bias, + ) + + func = mb.add if op_type == "add" else mb.sub + if is_first_input: + kwargs = { + "x": x, + "y": bias, + } + else: + kwargs = { + "x": bias, + "y": x, + } + x = func(**kwargs) + return x + + prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_linear_bias") + + assert get_op_types_in_program(prev_prog) == ["linear", op_type] + assert get_op_types_in_program(prog) == ["linear"] + + # validate graph pass + output_shape = [1, 2, 2] + output_shape = tuple(output_shape[-rank:]) + assert_model_is_valid( + prog, + {"x": input_shape}, + expected_output_shapes={block.outputs[0].name: output_shape}, + backend=backend, + ) + + +class TestFuseMatmulWeightBias: + def test_fuse_matmul_weight_bias(self): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + weights_val = np.random.rand(2, 4).T.astype(np.float32) + weights = mb.const(val=weights_val) + bias_val = np.random.rand(2).astype(np.float32) + bias = mb.const(val=bias_val) + + matmul = mb.matmul(x=x, y=weights) + return mb.add(x=matmul, y=bias) + + assert_op_count_match(prog, expect=1, op="matmul") + assert_op_count_match(prog, expect=0, op="linear") + prev_prog = copy.deepcopy(prog) + PASS_REGISTRY["common::fuse_matmul_weight_bias"](prog) + assert_same_output_names(prev_prog, prog) + assert_op_count_match(prog, expect=0, op="matmul") + assert_op_count_match(prog, expect=1, op="linear") + + if _VALIDATE_MODEL: + assert_model_is_valid(prog, {"x": (2, 4)}) + + +class TestFuseTransposeMatmul: + def test_fuse_transposes(self): + X_SHAPE = (3, 2) + Y_SHAPE = (5, 2) + + output_shape = (X_SHAPE[0], Y_SHAPE[0]) + + @mb.program(input_specs=[mb.TensorSpec(shape=X_SHAPE), mb.TensorSpec(shape=Y_SHAPE)]) + def prog(x, y): + transposed_x = mb.transpose(x=x, perm=(1, 0)) + transposed_y = mb.transpose(x=y, perm=(1, 0)) + z = mb.matmul(x=transposed_x, y=transposed_y, transpose_x=True, transpose_y=False) + return z + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::fuse_transpose_matmul") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + assert get_op_types_in_program(prev_prog) == ["transpose", "transpose", "matmul"] + assert get_op_types_in_program(prog) == ["matmul"] + + matmul = prog.find_ops(op_type="matmul")[0] + assert not matmul.transpose_x.val + assert matmul.transpose_y.val + + assert_model_is_valid( + prog, + {"x": X_SHAPE, "y": Y_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + ) + + def test_fuse_transpose_y(self): + X_SHAPE = (3, 2) + Y_SHAPE = (2, 5) + + output_shape = (X_SHAPE[0], Y_SHAPE[1]) + + @mb.program(input_specs=[mb.TensorSpec(shape=X_SHAPE), mb.TensorSpec(shape=Y_SHAPE)]) + def prog(x, y): + transposed_y = mb.transpose(x=y, perm=(1, 0)) + z = mb.matmul(x=x, y=transposed_y, transpose_y=True) + return z + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::fuse_transpose_matmul") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + assert get_op_types_in_program(prev_prog) == ["transpose", "matmul"] + assert get_op_types_in_program(prog) == ["matmul"] + + matmul = prog.find_ops(op_type="matmul")[0] + assert not matmul.transpose_x.val + assert not matmul.transpose_y.val + + assert_model_is_valid( + prog, + {"x": X_SHAPE, "y": Y_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + ) + + def test_fuse_transpose_x_but_unfuseable_transpose_y(self): + X_SHAPE = (4, 2, 5, 3) + Y_SHAPE = (4, 5, 2, 7) + + output_shape = (X_SHAPE[0], X_SHAPE[1], X_SHAPE[3], Y_SHAPE[3]) + + @mb.program(input_specs=[mb.TensorSpec(shape=X_SHAPE), mb.TensorSpec(shape=Y_SHAPE)]) + def prog(x, y): + transposed_x = mb.transpose(x=x, perm=(0, 1, 3, 2)) + transposed_y = mb.transpose(x=y, perm=(0, 2, 1, 3)) + z = mb.matmul(x=transposed_x, y=transposed_y) + return z + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::fuse_transpose_matmul") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + assert get_op_types_in_program(prev_prog) == ["transpose", "transpose", "matmul"] + assert get_op_types_in_program(prog) == ["transpose", "matmul"] + + assert_model_is_valid( + prog, + {"x": X_SHAPE, "y": Y_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + ) + + def test_unfuseable_transposes(self): + X_SHAPE = (3, 2, 5) + Y_SHAPE = (5, 2, 7) + + output_shape = (X_SHAPE[1], X_SHAPE[0], Y_SHAPE[2]) + + @mb.program(input_specs=[mb.TensorSpec(shape=X_SHAPE), mb.TensorSpec(shape=Y_SHAPE)]) + def prog(x, y): + transposed_x = mb.transpose(x=x, perm=(1, 0, 2)) + transposed_y = mb.transpose(x=y, perm=(1, 0, 2)) + z = mb.matmul(x=transposed_x, y=transposed_y) + return z + + prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::fuse_transpose_matmul") + _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + assert get_op_types_in_program(prev_prog) == ["transpose", "transpose", "matmul"] + assert get_op_types_in_program(prev_prog) == get_op_types_in_program(prog) + + assert_model_is_valid( + prog, + {"x": X_SHAPE, "y": Y_SHAPE}, + expected_output_shapes={block.outputs[0].name: output_shape}, + ) diff --git a/coremltools/converters/mil/mil/passes/tests/test_passes.py b/coremltools/converters/mil/mil/passes/tests/test_passes.py index 98fa61a66..1187cd915 100644 --- a/coremltools/converters/mil/mil/passes/tests/test_passes.py +++ b/coremltools/converters/mil/mil/passes/tests/test_passes.py @@ -9,21 +9,22 @@ import numpy as np import pytest -from mock import patch import coremltools as ct import coremltools.optimize as cto from coremltools._deps import _IS_MACOS +from coremltools.converters.mil import mil from coremltools.converters.mil.experimental.passes.generic_pass_infrastructure import ( register_generic_pass, ) from coremltools.converters.mil.mil import Builder as mb -from coremltools.converters.mil.mil import Function, Program, Symbol, get_new_symbol, types +from coremltools.converters.mil.mil import Function, types from coremltools.converters.mil.mil.ops.defs.iOS15.elementwise_unary import cast as _cast_iOS14 from coremltools.converters.mil.mil.ops.defs.iOS17.elementwise_unary import cast as _cast_iOS17 -from coremltools.converters.mil.mil.passes.defs.cleanup import topological_reorder +from coremltools.converters.mil.mil.passes.defs.optimize_repeat_ops import cast_optimization from coremltools.converters.mil.mil.passes.helper import _check_var_scalar_value from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY +from coremltools.converters.mil.mil.scope import ScopeInfo, ScopeSource from coremltools.converters.mil.mil.types import numpy_type_to_builtin_type from coremltools.converters.mil.mil.types.type_mapping import builtin_to_string from coremltools.converters.mil.testing_reqs import backends @@ -32,7 +33,6 @@ assert_model_is_valid, assert_op_count_match, assert_same_output_names, - get_op_names_in_program, get_op_types_in_block, get_op_types_in_program, ) @@ -113,140 +113,6 @@ def _get_constexpr_val(constexpr_var): "constexpr_affine_dequantize", ] -class TestConstDeduplication: - def test_const_deduplication(self): - BATCH_DIM = 5 - SEQUENCE_LENGTH = 4 - ENCODING_DIM = 256 - EMBEDDING_DIM = 128 - weight = np.random.rand(EMBEDDING_DIM, ENCODING_DIM) - bias = np.random.rand(EMBEDDING_DIM) - - @mb.program( - input_specs=[ - mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), - mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), - ] - ) - def prog(q, k): - q_e = mb.linear(x=q, weight=weight, bias=bias) - k_e = mb.linear(x=k, weight=weight, bias=bias) - attention = mb.matmul(x=q_e, y=k_e, transpose_y=True) - return attention - - prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") - assert_op_count_match(prev_prog, expect=6, op="const") - assert_op_count_match(prog, expect=4, op="const") - - @pytest.mark.parametrize( - "constexpr_op", - CONSTEXPR_OPS, - ) - def test_constexpr_deduplication(self, constexpr_op): - BATCH_DIM = 5 - SEQUENCE_LENGTH = 4 - ENCODING_DIM = 256 - EMBEDDING_DIM = 128 - @mb.program( - input_specs=[ - mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), - mb.TensorSpec(shape=(BATCH_DIM, SEQUENCE_LENGTH, ENCODING_DIM)), - ] - ) - def prog(q, k): - weight_q = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM, ENCODING_DIM), seed=19) - weight_k = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM, ENCODING_DIM), seed=19) - bias_q = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM,), seed=29) - bias_k = CONSTEXPR_FUNCS[constexpr_op]((EMBEDDING_DIM,), seed=29) - q_e = mb.linear(x=q, weight=weight_q, bias=bias_q) - k_e = mb.linear(x=k, weight=weight_k, bias=bias_k) - attention = mb.matmul(x=q_e, y=k_e, transpose_y=True) - return attention - - prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") - assert_op_count_match(prev_prog, expect=4, op=constexpr_op) - assert_op_count_match(prog, expect=2, op=constexpr_op) - - def test_const_deduplication_as_outputs(self): - """ - If the duplicated constants are block outputs, we should not remove them. - """ - # case 1: - # const_2 can be eliminated since it is not block output - const = np.random.rand(40, 20, 30) - - @mb.program( - input_specs=[ - mb.TensorSpec( - shape=( - 40, - 20, - 30, - ) - ) - ] - ) - def prog(x): - const_1 = mb.const(val=const, name="const_1") - const_2 = mb.const(val=const, name="const_2") - x = mb.relu(x=x) - x = mb.add(x=x, y=const_2) - return x, const_1 - - prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") - assert_op_count_match(prev_prog, expect=2, op="const") - assert_op_count_match(prog, expect=1, op="const") - assert prog.functions["main"].outputs[1].name == "const_1" - - # case 2: - # const_2 can not be eliminated since it is a block output - const = np.random.rand(40, 20, 30) - - @mb.program( - input_specs=[ - mb.TensorSpec( - shape=( - 40, - 20, - 30, - ) - ) - ] - ) - def prog(x): - const_1 = mb.const(val=const, name="const_1") - const_2 = mb.const(val=const, name="const_2") - x = mb.relu(x=x) - x = mb.add(x=x, y=const_2) - return x, const_1, const_2 - - prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") - assert_op_count_match(prev_prog, expect=2, op="const") - assert_op_count_match(prog, expect=2, op="const") - assert prog.functions["main"].outputs[1].name == "const_1" - assert prog.functions["main"].outputs[2].name == "const_2" - - @pytest.mark.skip("rdar://109374995 consts are not shared across blocks") - def test_const_deduplication_multiple_blocks(self): - weight = np.random.rand(5, 3, 2, 2) - - @mb.program(input_specs=[mb.TensorSpec(shape=(4, 3, 8, 8))]) - def prog(x): - def _true_fn(): - return mb.conv(x=x, weight=weight, pad_type="valid") - - def _false_fn(): - y = mb.mul(x=x, y=2.0) - return mb.conv(x=y, weight=weight, pad_type="valid") - - x_gt_0_tensor = mb.greater(x=x, y=0.0) - x_gt_0 = mb.slice_by_index(x=x_gt_0_tensor, begin=(0, 0, 0, 0), end=(1, 1, 1, 1)) - return mb.cond(pred=x_gt_0, _true_fn=_true_fn, _false_fn=_false_fn) - - prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_deduplication") - assert_op_count_match(prev_prog, expect=8, op="const") - assert_op_count_match(prog, expect=6, op="const") - class TestFuseSqueezeExpandDims: @pytest.mark.parametrize( @@ -283,1973 +149,74 @@ def prog(x): # noop_elimination can further remove the identity op apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prog) == ["relu"] - - def test_fuse_squeeze_expand_dims_negative(self): - """ - If squeeze and expand_dims cannot cancel each other, - the graph pass does nothing - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 1, 4, 1, 1))]) - def prog(x): - x = mb.squeeze(x=x, axes=(1, 2)) - x = mb.expand_dims(x=x, axes=(1, 3)) - return mb.relu(x=x) - - apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") - assert get_op_types_in_program(prog) == ["squeeze", "expand_dims", "relu"] - - def test_fuse_squeeze_expand_dims_connected_output(self): - """ - If squeeze is connected to block output, it cannot be removed. - However, the expand_dims can be a block output. - """ - # squeeze connected to output. Nothing happens. - @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) - def prog(x): - squeeze = mb.squeeze(x=x, axes=(0,)) - expand_dims = mb.expand_dims(x=squeeze, axes=(0,)) - return mb.relu(x=expand_dims), squeeze - - apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") - assert get_op_types_in_program(prog) == ["squeeze", "expand_dims", "relu"] - - # expand_dims connected to output. Still good to fuse. - @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) - def prog(x): - squeeze = mb.squeeze(x=x, axes=(0,)) - expand_dims = mb.expand_dims(x=squeeze, axes=(0,)) - return mb.relu(x=expand_dims), expand_dims - - apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") - assert get_op_types_in_program(prog) == ["identity", "relu"] - -class TestConstElimination: - def test_const_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - a = np.random.rand(2, 4).astype(np.float32) - double_a = mb.add(x=a, y=a) - return mb.add(x=x, y=double_a) - - assert_op_count_match(prog, expect=2, op="const") - prev_prog = copy.deepcopy(prog) - PASS_REGISTRY["common::const_elimination"](prog) - assert_same_output_names(prev_prog, prog) - assert_op_count_match(prog, expect=3, op="const") - - if _VALIDATE_MODEL: - assert_model_is_valid(prog, {"x": (2, 4)}) - - def test_const_elimination_nonreplaceable(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - a = np.random.rand(2, 4).astype(np.float16) - constexpr_a = mb.constexpr_cast(source_val=a, output_dtype="fp32") - double_a = mb.add(x=constexpr_a, y=a.astype(np.float32)) - return mb.add(x=x, y=double_a) - - prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_elimination") - assert get_op_types_in_program(prev_prog) == ["constexpr_cast", "add", "add"] - # Not fold into const because the upstream constexpr_cast op is non-replaceable. - assert get_op_types_in_program(prog) == ["constexpr_cast", "add", "add"] - - def test_force_const_eliminate_nonreplaceable_ops(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(3,), dtype=types.int32)]) - def prog(x): - a = np.random.rand(2, 3, 5).astype(np.float16) - constexpr_a = mb.constexpr_cast(source_val=a, output_dtype="fp32") - double_a = mb.add(x=constexpr_a, y=a.astype(np.float32)) - a_shape = mb.shape(x=double_a) - return mb.add(x=x, y=a_shape) - - assert get_op_types_in_program(prog) == ["constexpr_cast", "add", "shape", "add"] - - apply_pass_and_basic_check(prog, "common::const_elimination") - # still fold shape into const regardless the non-replaceable upstream - # constexpr_cast op, since it only provides a shape - assert get_op_types_in_program(prog) == ["constexpr_cast", "add", "add"] - - apply_pass_and_basic_check(prog, "common::dead_code_elimination") - # constexpr_cast(a) and add(a, a) no longer contributes to output, - # so they should get dead code eliminated - assert get_op_types_in_program(prog) == ["add"] - - @patch( - "coremltools.converters.mil.mil.passes.defs.cleanup.const_elimination._skip_const_by_size", - 1000, - ) - def test_const_elimination_larger_than_threshold(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) - def prog(x): - # Construct a 10 x 10 matrix (100 elements) which is smaller than the threshold (1000). - tmp = mb.range_1d(start=0, end=10, step=1) - tmp_x = mb.reshape(x=tmp, shape=[-1, 1]) - tmp_y = mb.reshape(x=tmp, shape=[1, -1]) - return mb.matmul(x=tmp_x, y=tmp_y) - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) - def prog_large_const_size(x): - # Construct a 100 x 100 matrix (10000 elements) which is larger than the threshold (1000). - tmp = mb.range_1d(start=0, end=100, step=1) - tmp_x = mb.reshape(x=tmp, shape=[-1, 1]) - tmp_y = mb.reshape(x=tmp, shape=[1, -1]) - return mb.matmul(x=tmp_x, y=tmp_y) - - prev_prog, _, _ = apply_pass_and_basic_check(prog, "common::const_elimination") - assert get_op_types_in_program(prev_prog) == [ - "range_1d", - "reshape", - "reshape", - "matmul", - ] - # All ops (range_1d, reshape, matmul) constructing that 10x10 matrix is folded into a const. - assert get_op_types_in_program(prog) == [] - - prev_prog_large_const_size, _, _ = apply_pass_and_basic_check( - prog_large_const_size, "common::const_elimination" - ) - assert get_op_types_in_program(prev_prog_large_const_size) == [ - "range_1d", - "reshape", - "reshape", - "matmul", - ] - # The matmul op constructing the large matrix is kept due to size larger than threshold. - assert get_op_types_in_program(prog_large_const_size) == ["matmul"] - - -class TestDeadCodeElimination: - def test_dead_code_elimination(self): - @mb.program( - input_specs=[ - mb.TensorSpec(shape=(2, 4)), - mb.TensorSpec(shape=(2, 4)), - ] - ) - def program0(x, y): - # following three unused op should be eliminated - a = mb.const(val=np.zeros(shape=(1,))) - b = mb.const(val=np.zeros(shape=(1,))) - _ = mb.add(x=a, y=b) - return mb.add(x=x, y=y) - - assert_op_count_match(program0, expect=4) - prev_prog = copy.deepcopy(program0) - PASS_REGISTRY["common::dead_code_elimination"](program0) - assert_same_output_names(prev_prog, program0) - assert_op_count_match(program0, expect=1) - - if _VALIDATE_MODEL: - assert_model_is_valid(program0, {"x": (2, 4), "y": (2, 4)}) - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def program1(x): - weights_val = np.random.rand(4, 2).T.astype(np.float32) - weights = mb.const(val=weights_val) - bias_val = np.random.rand(2).astype(np.float32) - bias = mb.const(val=bias_val) - - # unused op and its inputs should be eliminated - weights_for_matmul = mb.transpose(x=weights, perm=[1, 0]) - mb.matmul(x=x, y=weights_for_matmul) - - return mb.linear(x=x, weight=weights, bias=bias) - - assert_op_count_match(program1, expect=8) - prev_prog = copy.deepcopy(program1) - PASS_REGISTRY["common::dead_code_elimination"](program1) - assert_same_output_names(prev_prog, program1) - assert_op_count_match(program1, expect=3) - - if _VALIDATE_MODEL: - assert_model_is_valid(program1, {"x": (2, 4)}) - - -class TestDedupOpAndVarNames(unittest.TestCase): - def test_unchanged(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - x = mb.reshape(x=x, shape=(1, 8), name="reshape") - return x - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") - - self.assertEqual(get_op_types_in_program(prev_prog), ["reshape"]) - self.assertEqual(get_op_names_in_program(prev_prog), ["reshape"]) - - self.assertEqual(get_op_types_in_program(prog), ["reshape"]) - self.assertEqual(get_op_names_in_program(prog), ["reshape"]) - - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (1, 8)}, - ) - - def test_op_name_duplicated_once(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) - def prog(x): - x = mb.cast(x=x, dtype="fp16", name="castop") - x = mb.cast(x=x, dtype="fp32", name="castop") - x = mb.square(x=x, name="square_last") - return x - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") - - self.assertEqual(get_op_types_in_program(prev_prog), ["cast", "cast", "square"]) - self.assertEqual(get_op_names_in_program(prev_prog), ["castop", "castop", "square_last"]) - - self.assertEqual(get_op_types_in_program(prog), ["cast", "cast", "square"]) - self.assertEqual(get_op_names_in_program(prog), ["castop", "castop_1", "square_last"]) - - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={block.outputs[0].name: (10, 20)}, - ) - - def test_op_name_duplicated_many(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) - def prog(x): - x = mb.cast(x=x, dtype="fp16", name="castop") - x = mb.cast(x=x, dtype="fp16", name="castop") - x = mb.cast(x=x, dtype="int32", name="castop_2") - x = mb.cast(x=x, dtype="fp16", name="castop") - x = mb.cast(x=x, dtype="fp32", name="castop_2") - x = mb.square(x=x, name="square") - return x - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") - - self.assertEqual( - get_op_types_in_program(prev_prog), ["cast", "cast", "cast", "cast", "cast", "square"] - ) - self.assertEqual( - get_op_names_in_program(prev_prog), - ["castop", "castop", "castop_2", "castop", "castop_2", "square"], - ) - - self.assertEqual( - get_op_types_in_program(prog), ["cast", "cast", "cast", "cast", "cast", "square"] - ) - self.assertEqual( - get_op_names_in_program(prog), - ["castop", "castop_1", "castop_2", "castop_3", "castop_2_1", "square"], - ) - - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={block.outputs[0].name: (10, 20)}, - ) - - def test_input_name_shadow(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) - def prog(x): - # op name "x" results in output var name "x", which shadows prog - # input var name "x" - x = mb.transpose(x=x, perm=[1, 0], name="x") - x = mb.relu(x=x, name="relu") - return x - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") - self.assertEqual(get_op_types_in_program(prev_prog), ["transpose", "relu"]) - self.assertEqual(get_op_names_in_program(prev_prog), ["x", "relu"]) - - self.assertEqual(get_op_types_in_program(prog), ["transpose", "relu"]) - self.assertEqual(get_op_names_in_program(prog), ["x", "relu"]) - - op = prog["main"].find_ops(op_type="transpose")[0] - self.assertEqual("x_1", op.outputs[0].name) - - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={block.outputs[0].name: (20, 10)}, - ) - - def test_nested_block(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) - def prog(x): - def true_fn(): - # returns var with name x shadows input 'x' - return mb.add(x=x, y=1.0, name="x") - - def false_fn(): - # two ops with name "x" - return mb.add(x=x, y=-1.0, name="x") - - pred = mb.equal(x=mb.squeeze(x=x), y=1.0) - return mb.cond(pred=pred, _true_fn=true_fn, _false_fn=false_fn) - - cond_op = prog.functions["main"].operations[-1] - assert cond_op.blocks[0].outputs[0].name == "x" - assert cond_op.blocks[1].outputs[0].name == "x" - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::dedup_op_and_var_names") - cond_op = prog.functions["main"].operations[-1] - assert cond_op.blocks[0].outputs[0].name == "x_1" - assert cond_op.blocks[1].outputs[0].name == "x_2" - - assert_model_is_valid( - prog, - {"x": (1,)}, - expected_output_shapes={block.outputs[0].name: (1,)}, - ) - - -class TestAddConvTransposeOutputShape: - def test_add_conv_transpose_output_shape(self): - """ - Given: - %1: (1, 5, 39, fp32) = conv_transpose(...) # no output_shape input. - - Result: - %2: (3, i32) = const(val=[1,5,39]) - %3: (1, 5, 39, fp32) = conv_transpose(..., output_shape=%2) - """ - N, C_in, C_out, D1 = 1, 3, 5, 20 - - @mb.program(input_specs=[mb.TensorSpec(shape=(N, C_in, D1))]) - def prog(x): - weight = np.random.rand(C_in, C_out, D1).astype(np.float32) - return mb.conv_transpose(x=x, weight=weight) - - prev_prog, prev_block, block = apply_pass_and_basic_check( - prog, "common::add_conv_transpose_output_shape" - ) - assert get_op_types_in_program(prev_prog) == ["conv_transpose"] - assert get_op_types_in_program(prog) == ["conv_transpose"] - prev_conv_transpose_op = prev_prog.find_ops(op_type="conv_transpose", exactly_one=True)[0] - conv_transpose_op = prog.find_ops(op_type="conv_transpose", exactly_one=True)[0] - assert np.all(conv_transpose_op.output_shape.val == prev_conv_transpose_op.outputs[0].shape) - - -class TestNoopElimination: - @pytest.mark.parametrize("is_block_output", ((True, False))) - def test_identity(self, is_block_output): - """ - Input graph: - - input -> identity -> (add 1.0 if not is_block_output) -> output - - Output graph: - - if is_block_output: - input -> identity -> output - else: - input -> add 1.0 -> output - """ - SHAPE = (2, 3) - - @mb.program(input_specs=[mb.TensorSpec(shape=SHAPE)]) - def prog(x): - y = mb.identity(x=x) - if not is_block_output: - y = mb.add(x=y, y=1.0) - return y - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - if is_block_output: - assert get_op_types_in_program(prev_prog) == ["identity"] - assert get_op_types_in_program(prog) == ["identity"] - else: - assert get_op_types_in_program(prev_prog) == ["identity", "add"] - assert get_op_types_in_program(prog) == ["add"] - - output_name = block.outputs[0].name - assert_model_is_valid( - prog, - {"x": SHAPE}, - expected_output_shapes={output_name: SHAPE}, - ) - - @pytest.mark.parametrize( - "op_type, pos, val", - itertools.product( - ["add", "mul", "floor_div", "pow", "real_div", "sub"], - ["x", "y"], - [0.0, 1.0, [0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]], - ), - ) - def test_elementwise_elimination(self, op_type, pos, val): - if "div" in op_type and np.prod(val) == 0: - return - if "pow" in op_type and (val != 0 or val != 1): - return - - test_op = getattr(mb, op_type) - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - if pos == "x": - r1 = test_op(x=val, y=x) - else: - r1 = test_op(x=x, y=val) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - original_program = [op_type, "relu"] - new_program = original_program - if op_type in {"add"}: - if val == 0.0 or val == [0.0, 0.0, 0.0, 0.0]: - new_program = ["relu"] - elif op_type in {"mul"}: - if val == 1.0 or val == [1.0, 1.0, 1.0, 1.0]: - new_program = ["relu"] - elif op_type in {"real_div"}: - if pos == "y" and (val == 1.0 or val == [1.0, 1.0, 1.0, 1.0]): - new_program = ["relu"] - elif op_type in {"pow", "floor_div"}: - if pos == "y" and (val == 1.0 or val == [1.0, 1.0, 1.0, 1.0]): - new_program = ["relu"] - elif op_type in {"sub"}: - if pos == "y" and (val == 0.0 or val == [0.0, 0.0, 0.0, 0.0]): - new_program = ["relu"] - - assert get_op_types_in_program(prev_prog) == original_program - assert get_op_types_in_program(prog) == new_program - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_elementwise_broadcast(self): - @mb.program(input_specs=[mb.TensorSpec(shape=[4])]) - def prog(x): - r1 = mb.add(x=x, y=[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - original_program = ["add", "relu"] - - assert get_op_types_in_program(prev_prog) == original_program - assert get_op_types_in_program(prog) == original_program - assert_model_is_valid( - prog, - {"x": [4]}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_elementwise_elimination_fill(self): - """ - When fill layer with dynamic shape is fed to elementwise-binary operation, - even though the tensor can't be materialized at conversion time but no-op - elimination can still be performed based on fill-value - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, get_new_symbol()))]) - def prog(x): - shape = mb.shape(x=x) - y = mb.fill(value=0.0, shape=shape) - x = mb.add(x=x, y=y) - return mb.relu(x=x) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["shape", "fill", "add", "relu"] - assert get_op_types_in_program(prog) == ["shape", "fill", "relu"] - - apply_pass_and_basic_check(prog, "common::dead_code_elimination") - - assert get_op_types_in_program(prog) == ["relu"] - - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_reshape_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.reshape(x=x, shape=[1, 8]) - mb.reshape(x=r1, shape=[1, 8]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["reshape", "reshape", "relu"] - assert get_op_types_in_program(prog) == ["reshape", "relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (1, 8)}, - ) - - def test_oneway_split_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.split(x=x, num_splits=1, axis=-1) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["split", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_full_split_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.split(x=x, split_sizes=[4], axis=-1) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["split", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_slicebysize_full_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.slice_by_size(x=x, begin=[0, 0], size=[2, 4]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["slice_by_size", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_slicebysize_to_end_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.slice_by_size(x=x, begin=[0, 0], size=[-1, -1]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["slice_by_size", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_slicebyindex_full_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.slice_by_index(x=x, begin=[0, 0], end=[2, 4]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["slice_by_index", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_slicebyindex_negative_stride(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.slice_by_index( - x=x, - begin=[0, 0], - end=[0, 0], - stride=[1, -1], - begin_mask=[True, True], - end_mask=[True, True], - ) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["slice_by_index", "relu"] - assert get_op_types_in_program(prog) == ["slice_by_index", "relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - @pytest.mark.parametrize( - "begin_mask, end_mask", - itertools.product( - itertools.product([True, False], [True, False]), - itertools.product([True, False], [True, False]), - ), - ) - def test_slicebyindex_mask_elimination(self, begin_mask, end_mask): - @mb.program(input_specs=[mb.TensorSpec(shape=(4, 4))]) - def prog(x): - begin = [1, 1] - end = [1, 1] - for i in range(2): - if not begin_mask[i]: - begin[i] = 0 - if not end_mask[i]: - end[i] = 4 - r1 = mb.slice_by_index( - x=x, begin=begin, end=end, begin_mask=begin_mask, end_mask=end_mask - ) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["slice_by_index", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (4, 4)}, - expected_output_shapes={block.outputs[0].name: (4, 4)}, - ) - - def test_pad_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.pad(x=x, pad=[0, 0, 0, 0]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["pad", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_keep_pad(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.pad(x=x, pad=[4, 4, 2, 2]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["pad", "relu"] - assert get_op_types_in_program(prog) == ["pad", "relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (10, 8)}, - ) - - def test_tile_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.tile(x=x, reps=[1, 1]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["tile", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_keep_tile(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.tile(x=x, reps=[2, 2]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["tile", "relu"] - assert get_op_types_in_program(prog) == ["tile", "relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (4, 8)}, - ) - - def test_upsample_nearest_neighbor_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 2, 4))]) - def prog(x): - r1 = mb.upsample_nearest_neighbor(x=x) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["upsample_nearest_neighbor", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (3, 2, 4)}, - expected_output_shapes={block.outputs[0].name: (3, 2, 4)}, - ) - - def test_upsample_bilinear_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 2, 4))]) - def prog(x): - r1 = mb.upsample_bilinear(x=x) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["upsample_bilinear", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (3, 2, 4)}, - expected_output_shapes={block.outputs[0].name: (3, 2, 4)}, - ) - - def test_resize_bilinear_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 2, 4))]) - def prog(x): - r1 = mb.resize_bilinear(x=x, target_size_height=2, target_size_width=4) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["resize_bilinear", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (3, 2, 4)}, - expected_output_shapes={block.outputs[0].name: (3, 2, 4)}, - ) - - def test_crop_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 2, 4))]) - def prog(x): - r1 = mb.crop(x=x, crop_height=[0, 0], crop_width=[0, 0]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["crop", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (3, 2, 4)}, - expected_output_shapes={block.outputs[0].name: (3, 2, 4)}, - ) - - def test_linear_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) - def prog(x): - r1 = mb.linear_activation(x=x, alpha=1.0, beta=0.0) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["linear_activation", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 4)}, - ) - - def test_transpose_elimination(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 4))]) - def prog(x): - r1 = mb.transpose(x=x, perm=[0, 1, 2]) - return mb.relu(x=r1) - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::noop_elimination") - assert get_op_types_in_program(prev_prog) == ["transpose", "relu"] - assert get_op_types_in_program(prog) == ["relu"] - assert_model_is_valid( - prog, - {"x": (2, 3, 4)}, - expected_output_shapes={block.outputs[0].name: (2, 3, 4)}, - ) - - -class TestRemoveSymbolicReshape: - def test_remove_symbolic_reshape(self): - sym_b = Symbol("s0") - original_shape = (sym_b, Symbol("s1"), 2) - reshape_name = "reshape" - - @mb.program(input_specs=[mb.TensorSpec(shape=(sym_b, 4))]) - def prog(x): - # const cannot represent symbolic values. Use _const_symbolic - shape = mb._const_symbolic(val=original_shape) - return mb.reshape(x=x, shape=shape, name=reshape_name) - - reshape_op = prog.find_ops(prefix=reshape_name, op_type="reshape", exactly_one=True)[0] - shape_var = reshape_op.shape - reshaped_var = reshape_op.outputs[0] - assert np.all(shape_var.sym_val == original_shape) - assert np.all(reshaped_var.shape == (sym_b, 2, 2)) - - # Note: we cannot deepcopy prog with symbol. - prev_outputs = [o.name for o in prog["main"].outputs] - PASS_REGISTRY["common::remove_symbolic_reshape"](prog) - curr_outputs = [o.name for o in prog["main"].outputs] - assert curr_outputs == prev_outputs - - reshape_op = prog.find_ops(prefix=reshape_name, op_type="reshape", exactly_one=True)[0] - shape_var = reshape_op.shape - reshaped_var = reshape_op.outputs[0] - # shape param cannot be symbolic after the pass - assert np.all(shape_var.sym_val == (-1, 2, 2)) - # output shape is still symbolic - assert np.all(reshaped_var.shape == (sym_b, 2, 2)) - - if _VALIDATE_MODEL: - assert_model_is_valid(prog, {"x": (3, 4)}) - - -class TestLoopInvariantElimination: - def test_loop_invariant_elimination1(self): - """ - Invariant pattern: Block input vars are returned as block output vars. - """ - - def body(a, b): - return mb.add(x=a, y=b), b - - def cond(a, b): - a_mean = mb.reduce_mean(x=a, axes=[0, 1]) - b_mean = mb.reduce_mean(x=b, axes=[0, 1]) - return mb.less(x=a_mean, y=b_mean) - - @mb.program( - input_specs=[ - mb.TensorSpec(shape=(1, 2)), - mb.TensorSpec(shape=(1, 2)), - ] - ) - def prog(a, b): - # b is loop invariant - return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b)) - - while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0] - assert len(while_op.blocks[0].inputs) == 2 - assert len(while_op.outputs) == 2 - assert len(while_op.loop_vars) == 2 - assert while_op.blocks[0].inputs[0].name == "a_x0" - assert while_op.blocks[0].inputs[1].name == "b_x0" - - prev_prog = copy.deepcopy(prog) - PASS_REGISTRY["common::loop_invariant_elimination"](prog) - assert_same_output_names(prev_prog, prog) - - while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0] - assert len(while_op.blocks[0].inputs) == 1 - assert len(while_op.outputs) == 1 - assert len(while_op.loop_vars) == 1 - assert while_op.blocks[0].inputs[0].name == "a_x0" - - if _VALIDATE_MODEL: - assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)}) - - def test_loop_invariant_elimination2(self): - """ - Invariant pattern: Block outputs var from outside of the block - """ - - @mb.program( - input_specs=[ - mb.TensorSpec(shape=(1, 2)), - mb.TensorSpec(shape=(1, 2)), - ] - ) - def prog(a, b): - def body(a, bx): - return mb.add(x=a, y=b), b - - def cond(a, bx): - a_mean = mb.reduce_mean(x=a, axes=[0, 1]) - b_mean = mb.reduce_mean(x=bx, axes=[0, 1]) - return mb.less(x=a_mean, y=b_mean) - - # b is loop invariant - return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b)) - - while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0] - assert len(while_op.blocks[0].inputs) == 2 - assert len(while_op.outputs) == 2 - assert len(while_op.loop_vars) == 2 - assert while_op.blocks[0].inputs[0].name == "a_x0" - assert while_op.blocks[0].inputs[1].name == "b_x0" - - prev_prog = copy.deepcopy(prog) - PASS_REGISTRY["common::loop_invariant_elimination"](prog) - assert_same_output_names(prev_prog, prog) - - while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0] - assert len(while_op.blocks[0].inputs) == 1 - assert len(while_op.outputs) == 1 - assert len(while_op.loop_vars) == 1 - assert while_op.blocks[0].inputs[0].name == "a_x0" - - if _VALIDATE_MODEL: - assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)}) - - -class TestReduceMeanFusion: - def test_valid_pattern1(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) - def prog(x): - x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) - x1 = mb.mul(x=1.0 / 30, y=x1) - return x1 - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_reduce_mean") - assert get_op_types_in_program(prev_prog) == ["reduce_sum", "mul"] - assert get_op_types_in_program(prog) == ["reduce_mean"] - assert_model_is_valid( - prog, - {"x": (3, 5, 6)}, - expected_output_shapes={block.outputs[0].name: (3, 1, 1)}, - ) - - def test_valid_pattern2(self): - @mb.program(input_specs=[mb.TensorSpec(shape=(4, 5))]) - def prog(x): - x1 = mb.reduce_sum(x=x, axes=[0], keep_dims=False) - x1 = mb.real_div(x=x1, y=4.0) - return x1 - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_reduce_mean") - assert get_op_types_in_program(prev_prog) == ["reduce_sum", "real_div"] - assert get_op_types_in_program(prog) == ["reduce_mean"] - assert_model_is_valid( - prog, - {"x": (4, 5)}, - expected_output_shapes={block.outputs[0].name: (5,)}, - ) - - def test_invalid_pattern1(self): - """ - The mul does not correspond to "1/count" - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) - def prog(x): - x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) - x1 = mb.mul(x=5.0, y=x1) - return x1 - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_reduce_mean") - assert get_op_types_in_program(prog) == ["reduce_sum", "mul"] - - def test_invalid_pattern2(self): - """ - The div does not correspond to "count" - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) - def prog(x): - x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) - x1 = mb.real_div(x=x1, y=31.0) - return x1 - - prev_prog, prev_block, block = apply_pass_and_basic_check(prog, "common::fuse_reduce_mean") - assert get_op_types_in_program(prog) == ["reduce_sum", "real_div"] - - def test_invalid_pattern3(self): - """ - One of the reduction dim is symbolic - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(3, get_new_symbol(), 6))]) - def prog(x): - x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) - x1 = mb.real_div(x=x1, y=30.0) - return x1 - - pass_name = "common::fuse_reduce_mean" - PASS_REGISTRY[pass_name](prog) - assert get_op_types_in_program(prog) == ["reduce_sum", "real_div"] - - def test_invalid_pattern4(self): - """ - output of reduce_sum is model output - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) - def prog(x): - x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) - y1 = mb.real_div(x=x1, y=30.0) - return y1, x1 - - pass_name = "common::fuse_reduce_mean" - PASS_REGISTRY[pass_name](prog) - assert get_op_types_in_program(prog) == ["reduce_sum", "real_div"] - - def test_invalid_pattern5(self): - """ - output of reduce_sum is feeding into another op - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))]) - def prog(x): - x1 = mb.reduce_sum(x=x, axes=[-1, -2], keep_dims=True) - y1 = mb.real_div(x=x1, y=30.0) - y2 = mb.mul(x=x1, y=10.0) - y3 = mb.add(x=y1, y=y2) - return y3 - - pass_name = "common::fuse_reduce_mean" - PASS_REGISTRY[pass_name](prog) - assert get_op_types_in_program(prog) == ["reduce_sum", "real_div", "mul", "add"] - - -class TestRemoveRedundantOps: - def test_redundant_ops_just_after_input_valid_pattern_1(self): - """ - Input graph: - input----->transpose(perm=[0, 2, 1])--->add---> add ---> out - | ^ ^ - | | | - |---->transpose(perm=[0, 2, 1])---- | - | | - | | - |---->transpose(perm=[0, 2, 1])------------ - - Output graph: - input----->transpose(perm=[0, 2, 1])--->add---> add ----> out - | ^ ^ - | | | - |------------- | - | | - |-------------------- - """ - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) - def prog(x): - x1 = mb.transpose(x=x, perm=[0, 2, 1]) - x2 = mb.transpose(x=x, perm=[0, 2, 1]) - x3 = mb.transpose(x=x, perm=[0, 2, 1]) - z = mb.add(x=x1, y=x2) - z = mb.add(x=z, y=x3) - return z - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == [ - "transpose", - "transpose", - "transpose", - "add", - "add", - ] - assert get_op_types_in_program(prog) == ["transpose", "add", "add"] - assert_model_is_valid( - prog, - {"x": (2, 3, 5)}, - expected_output_shapes={block.outputs[0].name: (2, 5, 3)}, - ) - - def test_redundant_ops_just_after_input_valid_pattern_2(self): - """ - Input graph: - input----->leaky_relu(alpha=0.3)--->add---> add ---> out - | ^ ^ - | | | - |----->leaky_relu(alpha=0.3)--- | - | | - | | - |---->leaky_relu(alpha=0.3)------------ - - Output graph: - input--------->leaky_relu(alpha=0.3)--->add---> add ----> out - | ^ ^ - | | | - |------------- | - | | - |--------------------- - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) - def prog(x): - x1 = mb.leaky_relu(x=x, alpha=0.3) - x2 = mb.leaky_relu(x=x, alpha=0.3) - x3 = mb.leaky_relu(x=x, alpha=0.3) - z = mb.add(x=x1, y=x2) - z = mb.add(x=z, y=x3) - return z - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == [ - "leaky_relu", - "leaky_relu", - "leaky_relu", - "add", - "add", - ] - assert get_op_types_in_program(prog) == ["leaky_relu", "add", "add"] - assert_model_is_valid( - prog, - {"x": (2, 3, 5)}, - expected_output_shapes={block.outputs[0].name: (2, 3, 5)}, - ) - - def test_redundant_ops_just_after_input_valid_pattern_3(self): - """ - Input graph: - input----->leaky_relu(alpha=0.4)--->add---> add ---> out - | ^ ^ - | | | - |----->leaky_relu(alpha=0.3)--- | - | | - | | - |---->leaky_relu(alpha=0.3)------------ - - Output graph: - input----->leaky_relu(alpha=0.4)--->add---> add ---> out - | ^ ^ - | | | - |----->leaky_relu(alpha=0.3)---------- - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) - def prog(x): - x1 = mb.leaky_relu(x=x, alpha=0.4) - x2 = mb.leaky_relu(x=x, alpha=0.3) - x3 = mb.leaky_relu(x=x, alpha=0.3) - z = mb.add(x=x1, y=x2) - z = mb.add(x=z, y=x3) - return z - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == [ - "leaky_relu", - "leaky_relu", - "leaky_relu", - "add", - "add", - ] - assert get_op_types_in_program(prog) == ["leaky_relu", "leaky_relu", "add", "add"] - - leaky_relu_ops = block.find_ops(op_type="leaky_relu") - assert leaky_relu_ops[0].alpha.val == np.float32(0.4) - assert leaky_relu_ops[1].alpha.val == np.float32(0.3) - - def test_redundant_ops_just_after_input_invalid_pattern_1(self): - """ - input----->transpose(perm=[0, 2, 1])---> reshape(shape=[-1]) -----> add ---> out - | ^ - | | - |---->transpose(perm=[1, 0, 2])----> reshape(shape=[-1])------ - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) - def prog(x): - x1 = mb.transpose(x=x, perm=[0, 2, 1]) - x2 = mb.transpose(x=x, perm=[1, 0, 2]) - x1 = mb.reshape(x=x1, shape=[-1]) - x2 = mb.reshape(x=x2, shape=[-1]) - z = mb.add(x=x1, y=x2) - return z - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == [ - "transpose", - "transpose", - "reshape", - "reshape", - "add", - ] - assert get_op_types_in_program(prog) == [ - "transpose", - "transpose", - "reshape", - "reshape", - "add", - ] - assert_model_is_valid( - prog, - {"x": (2, 3, 5)}, - expected_output_shapes={block.outputs[0].name: (30,)}, - ) - - def test_redundant_ops_just_after_input_invalid_pattern_2(self): - """ - input----->leaky_relu(alpha=0.3) -----> add ---> out - | ^ - | | - |---->leaky_relu(alpha=0.4)------- - - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) - def prog(x): - x1 = mb.leaky_relu(x=x, alpha=0.3) - x2 = mb.leaky_relu(x=x, alpha=0.4) - z = mb.add(x=x1, y=x2) - return z - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == ["leaky_relu", "leaky_relu", "add"] - assert get_op_types_in_program(prog) == ["leaky_relu", "leaky_relu", "add"] - assert_model_is_valid( - prog, - {"x": (2, 3, 5)}, - expected_output_shapes={block.outputs[0].name: (2, 3, 5)}, - ) - - def test_redundant_ops_just_after_input_invalid_pattern_3(self): - """ - test case, when inputs of 1 op is a subset of the inputs of the other op - - input----->layer_norm1 -----> add ---> out - | ^ - | | - |---->layer_norm2------- - - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(1, 3, 2))]) - def prog(x): - x1 = mb.layer_norm(x=x, axes=[2], epsilon=1e-4) - gamma_val = np.array([1.0, 1.0], dtype=np.float32) - beta_val = np.array([1.0, 0.0], dtype=np.float32) - x2 = mb.layer_norm(x=x, axes=[2], epsilon=1e-4, gamma=gamma_val, beta=beta_val) - z = mb.add(x=x1, y=x2) - return z - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == ["layer_norm", "layer_norm", "add"] - assert get_op_types_in_program(prog) == ["layer_norm", "layer_norm", "add"] - assert_model_is_valid( - prog, - {"x": (1, 3, 2)}, - expected_output_shapes={block.outputs[0].name: (1, 3, 2)}, - ) - - @staticmethod - def _make_repeated_conv_prog(redundant_conv=True, out_channel=2): - prog = Program() - func_inputs = {"x": mb.placeholder(shape=[1, 4, 5, 5])} - with Function(func_inputs) as ssa_fun: - x = ssa_fun.inputs["x"] - x = mb.relu(x=x) - W = np.random.rand(out_channel, 4, 3, 3) - if redundant_conv: - bias = np.random.rand(out_channel) - x1 = mb.conv(x=x, weight=W, bias=bias, pad_type="same", strides=[1, 1]) - x2 = mb.conv(x=x, weight=W, bias=bias, pad_type="same", strides=[1, 1]) - else: - x1 = mb.conv( - x=x, weight=W, bias=np.random.rand(out_channel), pad_type="same", strides=[1, 1] - ) - x2 = mb.conv( - x=x, weight=W, bias=np.random.rand(out_channel), pad_type="same", strides=[1, 1] - ) - x1 = mb.relu(x=x1) - x2 = mb.relu(x=x2) - x1 = mb.avg_pool(x=x1, kernel_sizes=[2, 2], strides=[1, 1], pad_type="same") - z = mb.concat(values=(x1, x2), axis=-3) - ssa_fun.set_outputs([z]) - prog.add_function("main", ssa_fun) - return prog - - def test_redundant_ops_inside_graph_valid_pattern(self): - """ - Input graph: - input--> relu--------->conv------>relu----> pool ---> concat ---> out - | ^ - | | - |---->conv---->relu---------------------------- - - Output graph: - input-> relu--->conv------>relu----> pool ---> concat ---> out - | ^ - | | - |------------------- - """ - prog = self._make_repeated_conv_prog(redundant_conv=True) - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == [ - "relu", - "conv", - "conv", - "relu", - "relu", - "avg_pool", - "concat", - ] - assert get_op_types_in_program(prog) == ["relu", "conv", "relu", "avg_pool", "concat"] - assert_model_is_valid( - prog, - {"x": (1, 4, 5, 5)}, - expected_output_shapes={block.outputs[0].name: (1, 4, 5, 5)}, - ) - - def test_redundant_ops_inside_graph_with_large_const(self): - """ - For the large constants, they need to be deduplicated by the const_deduplication first. - This test is making sure the converter is not doing any "brutal force" comparison. - - Input graph: - input--> relu--------->conv------>relu----> pool ---> concat ---> out - | ^ - | | - |---->conv---->relu---------------------------- - - Output graph: - input-> relu--->conv------>relu----> pool ---> concat ---> out - | ^ - | | - |------------------- - """ - # The remove_redundant_ops is not doing brutal force array comparison - prog = self._make_repeated_conv_prog(redundant_conv=True, out_channel=10) - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - ops_in_prev_prog = [ - "relu", - "conv", - "conv", - "relu", - "relu", - "avg_pool", - "concat", - ] - assert get_op_types_in_program(prev_prog) == ops_in_prev_prog - assert get_op_types_in_program(prog) == ops_in_prev_prog - - # We need to first run the const_deduplication pass. - prog = self._make_repeated_conv_prog(redundant_conv=True, out_channel=10) - _, _, block = apply_pass_and_basic_check(prog, "common::const_deduplication") - _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") - _, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - - assert get_op_types_in_program(prog) == ["relu", "conv", "relu", "avg_pool", "concat"] - assert_model_is_valid( - prog, - {"x": (1, 4, 5, 5)}, - expected_output_shapes={block.outputs[0].name: (1, 20, 5, 5)}, - ) - - def test_redundant_ops_inside_graph_invalid_pattern(self): - """ - input--->relu--------->conv1------>relu----> pool ---> concat ---> out - | ^ - | | - |---->conv2---->relu--------------------------- - """ - prog = self._make_repeated_conv_prog(redundant_conv=False) - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == [ - "relu", - "conv", - "conv", - "relu", - "relu", - "avg_pool", - "concat", - ] - assert get_op_types_in_program(prog) == [ - "relu", - "conv", - "conv", - "relu", - "relu", - "avg_pool", - "concat", - ] - assert_model_is_valid( - prog, - {"x": (1, 4, 5, 5)}, - expected_output_shapes={block.outputs[0].name: (1, 4, 5, 5)}, - ) - - def test_redundant_op_as_output_valid_pattern_1(self): - """ - Input graph: - input--------->relu------> out1 - | - | - |---->relu---->tanh---> out2 - - Output graph: - input--------->relu------> out1 - | - | - |---->tanh---> out2 - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) - def prog(x): - x1 = mb.relu(x=x) - x2 = mb.relu(x=x) - return x1, mb.tanh(x=x2) - - prev_prog, _, block = apply_pass_and_basic_check(prog, "common::remove_redundant_ops") - assert get_op_types_in_program(prev_prog) == ["relu", "relu", "tanh"] - assert get_op_types_in_program(prog) == ["relu", "tanh"] - assert_model_is_valid( - prog, - {"x": (2, 3, 5)}, - expected_output_shapes={ - block.outputs[0].name: (2, 3, 5), - block.outputs[1].name: (2, 3, 5), - }, - ) - - def test_redundant_op_as_output_invalid_pattern_1(self): - """ - Input graph: - input--------->relu------> out1 - | - | - |---->relu---> out2 - - "common::remove_redundant_ops" pass does not remove ops if their outputs - are block outputs. - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3, 5))]) - def prog(x): - x1 = mb.relu(x=x) - x2 = mb.relu(x=x) - return x1, x2 - - prev_prog, _, block = apply_pass_and_basic_check( - prog, - "common::remove_redundant_ops", - ) - assert get_op_types_in_program(prev_prog) == ["relu", "relu"] - assert get_op_types_in_program(prog) == ["relu", "relu"] - assert_model_is_valid( - prog, - {"x": (2, 3, 5)}, - expected_output_shapes={ - block.outputs[0].name: (2, 3, 5), - block.outputs[1].name: (2, 3, 5), - }, - ) - - def test_cond_block_program(self): - """ - - Test identical ops within different blocks are not removed. The "relu" op inside true and - false blocks are not removed since they are in different blocks. - - Test ops that have blocks inside them are not removed. There are two cond ops here, - with identical inputs but they are not removed, since they are ops that have nested block - inside them. - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) - def prog(x): - x1 = mb.cast(x=x, dtype="bool") - - def true_fn(): - x = mb.shape(x=x1) - x = mb.cast(x=x, dtype="fp32") - return mb.add(x=x, y=1.0) - - def false_fn(): - x = mb.shape(x=x1) - x = mb.cast(x=x, dtype="fp32") - return mb.add(x=x, y=-1.0) - - z1 = mb.cond(pred=x1, _true_fn=true_fn, _false_fn=false_fn) - z2 = mb.cond(pred=x1, _true_fn=true_fn, _false_fn=false_fn) - z = mb.add(x=z1, y=z2) - return z - - prev_prog, _, block = apply_pass_and_basic_check( - prog, - "common::remove_redundant_ops", - ) - assert get_op_types_in_program(prev_prog) == ["cast", "cond", "cond", "add"] - assert get_op_types_in_program(prog) == ["cast", "cond", "cond", "add"] - cond_op = prog.find_ops(op_type="cond")[0] - assert cond_op.blocks[0].operations[0].op_type == "shape" - assert cond_op.blocks[1].operations[0].op_type == "shape" - assert_model_is_valid( - prog, - {"x": (1,)}, - expected_output_shapes={block.outputs[0].name: (1,)}, - ) - - def test_concat_op_pattern(self): - """ - Input graph: - ---------------> concat ------> log ------> out1 - | ^ - | | - input--------->relu------> concat ------> relu----> out2 - | ^ | - | | | - |---->tanh-------------------- - - Output graph: - |------>log ------> out1 - | - | - input--------->relu------> concat ------> relu----> out2 - | ^ - | | - |---->tanh--------- - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 5))]) - def prog(x): - x1 = mb.relu(x=x) - x2 = mb.tanh(x=x) - c1 = mb.concat(values=(x1, x2), axis=0) - c2 = mb.concat(values=(x1, x2), axis=0) - z1 = mb.log(x=c1) - z2 = mb.relu(x=c2) - return z1, z2 - - prev_prog, _, block = apply_pass_and_basic_check( - prog, - "common::remove_redundant_ops", - ) - assert get_op_types_in_program(prev_prog) == [ - "relu", - "tanh", - "concat", - "concat", - "log", - "relu", - ] - assert get_op_types_in_program(prog) == ["relu", "tanh", "concat", "log", "relu"] - assert_model_is_valid( - prog, - {"x": (10, 5)}, - expected_output_shapes={block.outputs[0].name: (20, 5), block.outputs[1].name: (20, 5)}, - ) - - def test_multiple_redundant_child_ops_pattern(self): - """ - Input graph - - input -------------> reshape ----------> add ---------> out1 - | ^ - | | - |-------> reshape --------------- - | - |------> slice_by_size-----> add ----------> out2 - | ^ - | | - |------> slice_by_size ------- - - Output graph - - input -------------> reshape ----------> add ------------> out1 - | | ^ - | | | - | |--------- - | - |------> slice_by_size----------> add -----------------> out2 - | ^ - | | - |--------------------- - - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 5, 4))]) - def prog(x): - x1 = mb.reshape(x=x, shape=[5, 2, -1]) - x2 = mb.reshape(x=x, shape=[5, 2, -1]) - x3 = mb.slice_by_size(x=x, begin=[0, 0, 1], size=[2, 4, 3]) - x4 = mb.slice_by_size(x=x, begin=[0, 0, 1], size=[2, 4, 3]) - z1 = mb.add(x=x1, y=x2) - z2 = mb.add(x=x3, y=x4) - return z1, z2 - - prev_prog, _, block = apply_pass_and_basic_check( - prog, - "common::remove_redundant_ops", - ) - assert get_op_types_in_program(prev_prog) == [ - "reshape", - "reshape", - "slice_by_size", - "slice_by_size", - "add", - "add", - ] - assert get_op_types_in_program(prog) == ["reshape", "slice_by_size", "add", "add"] - assert_model_is_valid( - prog, - {"x": (10, 5, 4)}, - expected_output_shapes={ - block.outputs[0].name: (5, 2, 20), - block.outputs[1].name: (2, 4, 3), - }, - ) - - def test_random_distribution_op_invalid_pattern(self): - """ - Identical random ops are not removed - - input----->cast---->random_uniform------> add ---> out - | ^ - | | - |---->random_uniform------------ - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(3,))]) - def prog(shape): - shape = mb.cast(x=shape, dtype="int32") - x1 = mb.random_uniform(shape=shape, low=0.0, high=1.0, seed=11) - x2 = mb.random_uniform(shape=shape, low=0.0, high=1.0, seed=11) - return mb.add(x=x1, y=x2) - - prev_prog, _, block = apply_pass_and_basic_check( - prog, - "common::remove_redundant_ops", - ) - assert get_op_types_in_program(prev_prog) == [ - "cast", - "random_uniform", - "random_uniform", - "add", - ] - assert get_op_types_in_program(prog) == ["cast", "random_uniform", "random_uniform", "add"] - - def test_nonreplaceable_vars(self): - """ - Nonreplaceable vars shouldn't be removed, e.g. palettized weights - - const_1----->add---->add_1------| - | | - input add---->output - | | - const_2----->add---->add_2------| - """ - def _constexpr_lut_to_dense(): - lut_data = np.array( - [-19.0, 4.0, 0.0, -1.0, 1.0, 3.0, 5.0, -8.0, 19, 13, 42, 4.5, 5.4, 2.0, -6, -7] - ).astype(np.float32) - indices = np.array([212, 21]).astype(np.uint8) - shape = np.array([4, 1]).astype(np.uint32) - return mb.constexpr_lut_to_dense(lut=lut_data, indices=indices, shape=shape) - - @mb.program(input_specs=[mb.TensorSpec(shape=(4, 1))]) - def prog(x): - constexpr_1 = _constexpr_lut_to_dense() - constexpr_2 = _constexpr_lut_to_dense() - c = mb.add(x=constexpr_1, y=x) - d = mb.add(x=constexpr_2, y=x) - return mb.add(x=c, y=d) - - prev_prog, _, _ = apply_pass_and_basic_check( - prog, - "common::remove_redundant_ops", - ) - assert get_op_types_in_program(prev_prog) == get_op_types_in_program(prog) - - -class TestTopologicalReorder: - def test_move_sink_casts_to_the_end(self): - """ - Input graph: - x (input) ---> square ---> cast (output) - | - | -----------> log ------> cast (output) - | - | -----------> relu -----> cast ----> relu (output) - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) - def prog(x): - x = mb.cast(x=x, dtype="fp16") - x1 = mb.square(x=x) - x2 = mb.cast(x=x1, dtype="fp32") - x3 = mb.log(x=x) - x4 = mb.cast(x=x3, dtype="fp32") - x5 = mb.relu(x=x) - x6 = mb.cast(x=x5, dtype="fp32") - x7 = mb.relu(x=x6) - return x2, x4, x7 - - assert get_op_types_in_program(prog) == [ - "cast", - "square", - "cast", - "log", - "cast", - "relu", - "cast", - "relu", - ] - - apply_pass_and_basic_check(prog, "common::topological_reorder") - _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") - - assert get_op_types_in_program(prog) == [ - "cast", - "square", - "log", - "relu", - "cast", - "relu", - "cast", - "cast", - ] - - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={ - block.outputs[0].name: (10, 20), - block.outputs[1].name: (10, 20), - block.outputs[2].name: (10, 20), - }, - ) - - def test_move_sink_cast_transpose_to_the_end(self): - """ - Input graph: - x (input) ---> square ---> transpose ---> cast (output) - | - | -----------> log ------> transpose ---> cast (output) - | - | -----------> relu -----> cast ----> relu (output) - | - | -----------> relu (output) - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) - def prog(x): - x = mb.cast(x=x, dtype="fp16") - x1 = mb.square(x=x) - x1_t = mb.transpose(x=x1, perm=[1, 0]) - x2 = mb.cast(x=x1_t, dtype="fp32") - x3 = mb.log(x=x) - x3_t = mb.transpose(x=x3, perm=[1, 0]) - x4 = mb.cast(x=x3_t, dtype="fp32") - x5 = mb.relu(x=x) - x6 = mb.cast(x=x5, dtype="fp32") - x7 = mb.relu(x=x6) - x8 = mb.relu(x=x) - return x2, x4, x7, x8 - - assert get_op_types_in_program(prog) == [ - "cast", - "square", - "transpose", - "cast", - "log", - "transpose", - "cast", - "relu", - "cast", - "relu", - "relu", - ] - - apply_pass_and_basic_check(prog, "common::topological_reorder") - _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") - - assert get_op_types_in_program(prog) == [ - "cast", - "square", - "log", - "relu", - "cast", - "relu", - "relu", - "transpose", - "cast", - "transpose", - "cast", - ] - - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={ - block.outputs[0].name: (20, 10), - block.outputs[1].name: (20, 10), - block.outputs[2].name: (10, 20), - block.outputs[3].name: (10, 20), - }, - ) - - def test_move_multiple_uses_overlapping(self): - """ - Input graph: - x (input) ---> cast ---> cast (output) - | - |-------> transpose ---> transpose (output) - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) - def prog(x): - x1 = mb.cast(x=x, dtype="fp16") - x2 = mb.cast(x=x1, dtype="fp32") - x3 = mb.transpose(x=x1, perm=[1, 0]) - x4 = mb.transpose(x=x3, perm=[1, 0]) - return x2, x4 - - assert get_op_types_in_program(prog) == ["cast", "cast", "transpose", "transpose"] - - apply_pass_and_basic_check(prog, "common::topological_reorder") - _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") - - assert get_op_types_in_program(prog) == ["cast", "transpose", "transpose", "cast"] - - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={ - block.outputs[0].name: (10, 20), - block.outputs[1].name: (10, 20), - }, - ) - - def test_move_split_to_first_use(self): - """ - Input graph: - x (input) ---> split ---> square ---> add (output) - | | | - | | --------------------| - | - | -----------> square --------------> relu (output) - """ - - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) - def prog(x): - s1, s2 = mb.split(x=x, num_splits=2, axis=0) - x2 = mb.square(x=x) - x3 = mb.relu(x=x2) - s1_1 = mb.square(x=s1) - s3 = mb.add(x=s1_1, y=s2) - return x3, s3 - - assert get_op_types_in_program(prog) == ["split", "square", "relu", "square", "add"] - - block = prog.functions["main"] - # Reorder `split` op to test op with multiple output case - topological_reorder._move_operations_to_the_end_block(block, ["split"]) - assert get_op_types_in_program(prog) == ["square", "relu", "split", "square", "add"] - - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={ - block.outputs[0].name: (10, 20), - block.outputs[1].name: (5, 20), - }, - ) + assert get_op_types_in_program(prog) == ["relu"] - def test_move_transpose_before_subblock(self): + def test_fuse_squeeze_expand_dims_negative(self): """ - Input graph: - x (input) ---> cast ---> transpose ---> cast (output) - | - | -----------> square ------> transpose (x1_t) ---> cast (output) - | - | -----------> squeeze ----> equal ----> squeeze - | - (true) <--- / \ ---> (false) - | | - | /<-(x1_t)->\ | - add <-/ \--> add - |---------> | <---------| - | - add ---> cast (output) + If squeeze and expand_dims cannot cancel each other, + the graph pass does nothing """ - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + @mb.program(input_specs=[mb.TensorSpec(shape=(3, 1, 4, 1, 1))]) def prog(x): - x = mb.cast(x=x, dtype="fp16") - x1 = mb.square(x=x) - x1_t = mb.transpose(x=x1, perm=[1, 0]) + x = mb.squeeze(x=x, axes=(1, 2)) + x = mb.expand_dims(x=x, axes=(1, 3)) + return mb.relu(x=x) - def true_fn(): - return mb.add(x=x1_t, y=np.float16(1), name="x2") + apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") + assert get_op_types_in_program(prog) == ["squeeze", "expand_dims", "relu"] - def false_fn(): - return mb.add(x=x1_t, y=np.float16(2), name="x2") + def test_fuse_squeeze_expand_dims_connected_output(self): + """ + If squeeze is connected to block output, it cannot be removed. + However, the expand_dims can be a block output. + """ + # squeeze connected to output. Nothing happens. + @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) + def prog(x): + squeeze = mb.squeeze(x=x, axes=(0,)) + expand_dims = mb.expand_dims(x=squeeze, axes=(0,)) + return mb.relu(x=expand_dims), squeeze - is_one = mb.equal(x=mb.squeeze(x=x), y=np.float16(1.0)) - pred = mb.squeeze(x=is_one) - x3 = mb.cond(pred=pred, _true_fn=true_fn, _false_fn=false_fn) - x4 = mb.add(x=x1_t, y=x3) - x5 = mb.cast(x=x4, dtype="fp32") - return x5 + apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") + assert get_op_types_in_program(prog) == ["squeeze", "expand_dims", "relu"] - apply_pass_and_basic_check(prog, "common::topological_reorder") - _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") + # expand_dims connected to output. Still good to fuse. + @mb.program(input_specs=[mb.TensorSpec(shape=(1,))]) + def prog(x): + squeeze = mb.squeeze(x=x, axes=(0,)) + expand_dims = mb.expand_dims(x=squeeze, axes=(0,)) + return mb.relu(x=expand_dims), expand_dims - assert get_op_types_in_program(prog) == [ - "cast", - "square", - "squeeze", - "equal", - "squeeze", - "transpose", - "cond", - "add", - "cast", - ] + apply_pass_and_basic_check(prog, "common::fuse_squeeze_expand_dims") + assert get_op_types_in_program(prog) == ["identity", "relu"] - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={block.outputs[0].name: (20, 10)}, - ) - def test_cast_transpose_already_at_the_end(self): +class TestAddConvTransposeOutputShape: + def test_add_conv_transpose_output_shape(self): """ - Input graph: - x (input) ---> square ---> transpose ---> cast (output) - | - | -----------> log ------> transpose ---> cast (output) - | - | -----------> relu -----> cast ----> relu (output) - | - | -----------> relu (output) + Given: + %1: (1, 5, 39, fp32) = conv_transpose(...) # no output_shape input. + + Result: + %2: (3, i32) = const(val=[1,5,39]) + %3: (1, 5, 39, fp32) = conv_transpose(..., output_shape=%2) """ + N, C_in, C_out, D1 = 1, 3, 5, 20 - @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))]) + @mb.program(input_specs=[mb.TensorSpec(shape=(N, C_in, D1))]) def prog(x): - x = mb.cast(x=x, dtype="fp16") - x1 = mb.square(x=x) - x3 = mb.log(x=x) - x5 = mb.relu(x=x) - x6 = mb.cast(x=x5, dtype="fp32") - x7 = mb.relu(x=x6) - x8 = mb.relu(x=x) - x1_t = mb.transpose(x=x1, perm=[1, 0]) - x2 = mb.cast(x=x1_t, dtype="fp32") - x3_t = mb.transpose(x=x3, perm=[1, 0]) - x4 = mb.cast(x=x3_t, dtype="fp32") - return x2, x4, x7, x8 - - assert get_op_types_in_program(prog) == [ - "cast", - "square", - "log", - "relu", - "cast", - "relu", - "relu", - "transpose", - "cast", - "transpose", - "cast", - ] - - apply_pass_and_basic_check(prog, "common::topological_reorder") - _, _, block = apply_pass_and_basic_check(prog, "common::dead_code_elimination") - - assert get_op_types_in_program(prog) == [ - "cast", - "square", - "log", - "relu", - "cast", - "relu", - "relu", - "transpose", - "cast", - "transpose", - "cast", - ] + weight = np.random.rand(C_in, C_out, D1).astype(np.float32) + return mb.conv_transpose(x=x, weight=weight) - assert_model_is_valid( - prog, - {"x": (10, 20)}, - expected_output_shapes={ - block.outputs[0].name: (20, 10), - block.outputs[1].name: (20, 10), - block.outputs[2].name: (10, 20), - block.outputs[3].name: (10, 20), - }, + prev_prog, prev_block, block = apply_pass_and_basic_check( + prog, "common::add_conv_transpose_output_shape" ) + assert get_op_types_in_program(prev_prog) == ["conv_transpose"] + assert get_op_types_in_program(prog) == ["conv_transpose"] + prev_conv_transpose_op = prev_prog.find_ops(op_type="conv_transpose", exactly_one=True)[0] + conv_transpose_op = prog.find_ops(op_type="conv_transpose", exactly_one=True)[0] + assert np.all(conv_transpose_op.output_shape.val == prev_conv_transpose_op.outputs[0].shape) class TestChildOrdering: @@ -3707,6 +1674,41 @@ class TestCastOptimizationReduendantCastRemoval: """ Test single cast op removal. """ + + def test_time_complexity(self): + """ + This test makes sure the cast_optimization's time complexity is O(N) for most of the cases. + + In this test case, the program consists of 1000 relu ops followed by 100 cast ops. + + input -> relu -> relu -> ... -> relu -> cast -> cast -> ... -> cast + + The algorithm goes through the first pass to eliminate all cast ops: + + input -> relu -> ... -> relu + + Note that, the total number of visited op is 1000 (relu) + 100 (cast) + 100 (const for the dtype) = 1200. + + Because the fusion happens, the algorithm goes through the program again. + This time, the number of visited op is 1000 (relu) + 100 (const) = 1100. + + Overally, the number of visited op is 2300. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(1,), dtype=types.fp32)]) + def prog(x): + for _ in range(1000): + x = mb.relu(x=x) + for _ in range(100): + x = mb.cast(x=x, dtype="fp32") + return x + + graph_pass = cast_optimization() + graph_pass.apply(prog) + assert ( + graph_pass._num_of_visited_ops == 2_300 + ) # Please refer to the doc string for how 2300 comes from. + def test_remove_redundant_cast_smoke(self): """ Input graph: @@ -4660,6 +2662,7 @@ def _false_fn(): v.dtype.val for v in false_block.find_ops(op_type="cast") ] + class TestConv1dCompositionPasses: @pytest.mark.parametrize( "backend, has_strides, pad_type, has_pad, has_dilations, has_bias", @@ -4690,7 +2693,8 @@ def test_conv1d_composition( if has_strides: conv_kwargs["strides"] = (2, 2) if has_pad: - conv_kwargs["pad"] = (1, 1, 1, 1) + # The pad is specially designed to make sure the output of conv has dim_size=1 at axis 1. + conv_kwargs["pad"] = (0, 0, 1, 1) if pad_type == "custom" else (1, 1, 1, 1) if has_dilations: conv_kwargs["dilations"] = (2, 2) if has_bias: @@ -4896,7 +2900,8 @@ def test_conv1d_channellast_composition( if has_strides: conv_kwargs["strides"] = (2, 2) if has_pad: - conv_kwargs["pad"] = (1, 1, 1, 1) + # The pad is specially designed to make sure the output of conv has dim_size=1 at axis 1. + conv_kwargs["pad"] = (0, 0, 1, 1) if pad_type == "custom" else (1, 1, 1, 1) if has_dilations: conv_kwargs["dilations"] = (2, 2) if has_bias: @@ -4961,7 +2966,8 @@ def test_conv1d_channellast_composotion_dynamic_weight(self, backend): K = 4 strides = (1, 2) - pad = (1, 0, 0, 1) + # The pad is specially designed to make sure the output of conv has dim_size=1 at axis 1. + pad = (0, 0, 0, 1) # MIL convolution with dynamic weights does not support dilations != 1 # see coremltools/coremltools/converters/mil/mil/ops/defs/iOS15/conv.py dilations = (1, 1) @@ -5035,7 +3041,7 @@ def test_conv1d_channellast_bias_fusion(self, backend, has_bias, bias_op_type): K = 4 strides = (1, 2) - pad = (0, 1, 1, 0) + pad = (0, 0, 1, 0) dilations = (1, 2) # infer L_out with pad_type fixed to custom @@ -5414,6 +3420,36 @@ def prog(x): apply_pass_and_basic_check(prog, "common::dead_code_elimination") assert get_op_types_in_program(prog) == ["conv"] + def test_scope_back_propagation(self): + Cin, Cout = 3, 3 + input_shape = (2, Cin, 100, 100) + + @mb.program(input_specs=[mb.TensorSpec(shape=input_shape)]) + def prog(x): + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"])): + x = self.get_conv(x, "conv1") + + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"])): + x = self.get_linear(x, "linear1", "add") + + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_3"])): + x = self.get_conv(x, "conv2") + + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_4"])): + x = self.get_linear(x, "linear2", "add") + return x + + apply_pass_and_basic_check(prog, "common::fuse_conv_bias") + assert get_op_types_in_program(prog) == ["conv", "conv"] + + conv_ops = prog.functions["main"].find_ops(op_type="conv") + assert conv_ops[0].scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2", "fuse_conv_bias"] + } + assert conv_ops[1].scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_4", "fuse_conv_bias"] + } + """ Input graph: Const @@ -7551,7 +5587,9 @@ def prog(x): x4 = mb.add(x=x1, y=x3) return mb.relu(x=x4) - prog.main_input_types = [ct.ImageType(name="x", shape=(10, 20, 30, 3), channel_first=False)] + prog.functions["main"].input_types = [ + ct.ImageType(name="x", shape=(10, 20, 30, 3), channel_first=False) + ] prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "common::image_input_preprocess" ) @@ -7594,7 +5632,7 @@ def test_nn_backend_style_sanitization(self): for the NN backend. """ - prog = Program() + prog = mil.Program() func_inputs = {"x/0": mb.placeholder(shape=[2, 3]), "y": mb.placeholder(shape=[2, 3])} with Function(func_inputs) as ssa_fun: x, y = ssa_fun.inputs["x/0"], ssa_fun.inputs["y"] @@ -7654,7 +5692,7 @@ def prog(input): x = mb.square(x=input, name="output_square") return x - prog.set_main_output_types([ct.TensorType(dtype=np.float16)]) + prog.functions["main"].set_output_types([ct.TensorType(dtype=np.float16)]) prev_prog, prev_block, block = apply_pass_and_basic_check( prog, "common::update_output_dtypes", skip_output_type_check=True ) @@ -7691,7 +5729,7 @@ def prog(input): x1, x2 = mb.split(x=input, num_splits=2, axis=1, name="split") return x1, x2 - prog.set_main_output_types([ct.TensorType(), ct.TensorType(dtype=np.float16)]) + prog.functions["main"].set_output_types([ct.TensorType(), ct.TensorType(dtype=np.float16)]) _, _, block = apply_pass_and_basic_check( prog, "common::update_output_dtypes", skip_output_type_check=True ) @@ -7723,7 +5761,7 @@ def test_output_as_input(self, caplog): def prog(input): return input - prog.set_main_output_types([ct.TensorType(dtype=np.float16)]) + prog.functions["main"].set_output_types([ct.TensorType(dtype=np.float16)]) _, _, block = apply_pass_and_basic_check( prog, "common::update_output_dtypes", @@ -8378,7 +6416,6 @@ def prog(x): prog, {"x": shape}, expected_output_shapes={block.outputs[0].name: shape} ) - class TestFuseLinearBias: @staticmethod def _apply_transform(inputs, func, is_first_input, has_bias): @@ -8573,3 +6610,387 @@ def prog(x): if _VALIDATE_MODEL: assert_model_is_valid(prog, {"x": (2, 4)}) + + +class TestGraphPassScopePreservation: + def test_single_pass(self): + """ + Input: + + x + -> relu(torch_scope="module_1") + -> transpose_1(torch_scope="module_1") + -> transpose_2(torch_scope="module_2") + -> output + + Output: + + x + -> relu(torch_scope="module_1") + -> transpose_3( + torch_scope="module_2", + pass_scope="merge_consecutive_transposes" + ) + -> output + + In the above case, the relu op preserves its original scope information. + Since transpose_3 is created by the "merge_consecutive_transposes" pass, the COREMLTOOLS_GRAPH_PASS scope + information will be saved in the op. + Also, the TORCHSCRIPT_MODULE_TYPE scope info of transpose_2 is back propagated to transpose_3, + when the use of output of transpose_2 is replaced by the output of transpose_3. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_1"), + ): + x = mb.relu(x=x) + x = mb.transpose(x=x, perm=[0, 2, 1, 3]) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_2"), + ): + return mb.transpose(x=x, perm=[3, 2, 0, 1]) + + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_TYPE) + + apply_pass_and_basic_check(prog, "common::merge_consecutive_transposes") + assert get_op_types_in_program(prog) == ["relu", "transpose"] + + # the scope info in the relu op is not affected + relu_op = prog.find_ops(op_type="relu")[0] + assert len(relu_op.scopes) == 1 + assert relu_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + + # the new transpose op has the scope information from the graph pass + transpose_op = prog.find_ops(op_type="transpose")[0] + assert len(transpose_op.scopes) == 2 + assert transpose_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "merge_consecutive_transposes" + ] + assert transpose_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_2"] + + def test_single_pass_without_creating_new_var(self): + """ + Input: + + x + -> relu(torch_scope="module_1") + -> relu(torch_scope="module_2") + -> relu(torch_scope="module_3") + -> output + + Output: + + x + -> relu(torch_scope="module_1") + -> output + + In the above case, the relu op preserves its original scope information, since the graph pass only reconnects the graph. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_1"), + ): + x = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_2"), + ): + x = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_3"), + ): + return mb.relu(x=x) + + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_TYPE) + + apply_pass_and_basic_check(prog, "common::merge_consecutive_relus") + assert get_op_types_in_program(prog) == ["relu"] + + # the scope info in the relu op is not affected + relu_op = prog.find_ops(op_type="relu")[0] + assert len(relu_op.scopes) == 1 + assert relu_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + + def test_multiple_passes(self): + """ + In this case, a program goes through two graph passes. + And the resulting program should have scope information from both passes. + """ + shape = (3, 5, 6, 7) + + @mb.program(input_specs=[mb.TensorSpec(shape=shape)]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_1"), + ): + # dummy op + x = mb.relu(x=x) + + # pattern for "merge_consecutive_transposes" + x = mb.transpose(x=x, perm=[0, 2, 1, 3]) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_2"), + ): + x = mb.transpose(x=x, perm=[3, 2, 0, 1]) + + # pattern for "fuse_layernorm_or_instancenorm" + mean0 = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=True) + sub0 = mb.sub(x=x, y=mean0) + sub1 = mb.sub(x=x, y=mean0) + square = mb.square(x=sub0) + mean1 = mb.reduce_mean(x=square, axes=[2, 3], keep_dims=True) + add_eps = mb.add(x=mean1, y=1e-5) # epsilon + pow = mb.pow(x=add_eps, y=0.5) + div = mb.real_div(x=sub1, y=pow) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_3"), + ): + mul_gamma = mb.mul(x=np.random.rand(1, shape[1], 1, 1), y=div) + return mb.add(x=np.random.rand(1, shape[1], 1, 1), y=mul_gamma) + + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_TYPE) + + apply_pass_and_basic_check(prog, "common::fuse_layernorm_or_instancenorm") + apply_pass_and_basic_check(prog, "common::merge_consecutive_transposes") + assert get_op_types_in_program(prog) == ["relu", "transpose", "instance_norm"] + + # the scope info in the relu op is not affected + relu_op = prog.find_ops(op_type="relu")[0] + assert len(relu_op.scopes) == 1 + assert relu_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + + # the new transpose op has the scope information from the graph pass + transpose_op = prog.find_ops(op_type="transpose")[0] + assert len(transpose_op.scopes) == 2 + assert transpose_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "merge_consecutive_transposes" + ] + assert transpose_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_2"] + + # the new instance_norm op has the scope information from the graph pass + instance_norm_op = prog.find_ops(op_type="instance_norm")[0] + assert len(instance_norm_op.scopes) == 2 + assert instance_norm_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "fuse_layernorm_or_instancenorm" + ] + assert instance_norm_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_3"] + + def test_fp16_scope_preservation(self): + """ + This test explains step-by-step how the scope information preservation works in the fp32 -> fp16 pass. + + Input graph: + + x(fp32) + -> relu(torch_scope="module_1") + -> sin(torch_scope="module_2") + -> output(fp32) + + (1) "common::add_fp16_cast" + + First, in the add_fp16_cast graph pass, multiple cast ops are injected in the graph: + + x(fp32) + -> cast(dtype="fp16", torch_scope="module_1", pass_scope="add_fp16_cast") + -> relu(torch_scope="module_1", pass_scope="add_fp16_cast") + -> cast(dtype="fp32", torch_scope="module_1", pass_scope="add_fp16_cast") + -> cast(dtype="fp16", torch_scope="module_2", pass_scope="add_fp16_cast") + -> sin(torch_scope="module_2", pass_scope="add_fp16_cast") + -> cast(dtype="fp32, torch_scope="module_2", pass_scope="add_fp16_cast") + -> output + + There are 4 cast ops in the graph who has pass_scope = "add_fp16_cast", which indicates they are added by the "add_fp16_cast" pass. + + Note that, the first cast -> relu -> cast pattern has the same torch scope information as + the original relu(torch_scope="module_1"). This is due to the fact that when we replace + the use of the original relu output with the output of the second cast op, the scope information is back propagated. + + The same reason applied for why the cast -> sin -> cast patterns has the torch scope as + the original sin op. + + (2) "common::cast_optimization" + "dead-code_elimination" + + After the cleanup, the graph becomes: + + x(fp32) + -> cast( + dtype="fp16", + torch_scope="module_1", + pass_scope="add_fp16_cast" + ) + -> relu( + torch_scope="module_1", + pass_scope="add_fp16_cast] + ) + -> sin( + torch_scope="module_2", + pass_scope="add_fp16_cast" + ) + -> cast( + dtype="fp32, + torch_scope="module_2", + pass_scope="add_fp16_cast" + ) + -> output + + We can see that, the fp16 version of relu / sin preserves the original torch scope information. + """ + shape = (3, 5, 6, 7) + + @mb.program(input_specs=[mb.TensorSpec(shape=shape)]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_1"), + ): + x = mb.relu(x=x) + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_2"), + ): + return mb.sin(x=x) + + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_TYPE) + + # fp16 cast pass + apply_pass_and_basic_check(prog, "common::add_fp16_cast") + assert get_op_types_in_program(prog) == ["cast", "relu", "cast", "cast", "sin", "cast"] + + cast_ops = prog.find_ops(op_type="cast") + assert len(cast_ops) == 4 + assert len(cast_ops[0].scopes) == 2 + assert cast_ops[0].scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == ["add_fp16_cast"] + assert cast_ops[0].scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + assert len(cast_ops[1].scopes) == 2 + assert cast_ops[1].scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == ["add_fp16_cast"] + assert cast_ops[1].scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + assert len(cast_ops[2].scopes) == 2 + assert cast_ops[2].scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "add_fp16_cast", + ] + assert cast_ops[2].scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_2"] + assert len(cast_ops[3].scopes) == 2 + assert cast_ops[3].scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == ["add_fp16_cast"] + assert cast_ops[3].scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_2"] + + relu_op = prog.find_ops(op_type="relu")[0] + assert len(relu_op.scopes) == 2 + assert relu_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == ["add_fp16_cast"] + assert relu_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + + sin_op = prog.find_ops(op_type="sin")[0] + assert len(sin_op.scopes) == 2 + assert sin_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == ["add_fp16_cast"] + assert sin_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_2"] + + # clean up with cast optimization and dead code elimination + apply_pass_and_basic_check(prog, "common::cast_optimization") + apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == ["cast", "relu", "sin", "cast"] + cast_ops = prog.find_ops(op_type="cast") + assert len(cast_ops) == 2 + assert len(cast_ops[0].scopes) == 2 + assert cast_ops[0].scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == ["add_fp16_cast"] + assert cast_ops[0].scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + assert len(cast_ops[1].scopes) == 2 + assert cast_ops[1].scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == ["add_fp16_cast"] + assert cast_ops[1].scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_2"] + + relu_op = prog.find_ops(op_type="relu")[0] + assert len(relu_op.scopes) == 2 + assert relu_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "add_fp16_cast", + ] + assert relu_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + + sin_op = prog.find_ops(op_type="sin")[0] + assert len(sin_op.scopes) == 2 + assert sin_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == ["add_fp16_cast"] + assert sin_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_2"] + + def test_pass_followed_by_fp16(self): + """ + Input: + + x + -> transpose_1(torch_scope="module_1") + -> transpose_2(torch_scope="module_2") + -> output + + Output: + + x + -> cast( + dtype="fp16", + torch_scope="module_2", + pass_scope=["merge_consecutive_transposes", "add_fp16_cast"] + ) + -> transpose_3_fp16( + torch_scope="module_2", + pass_scope=["merge_consecutive_transposes", "add_fp16_cast"] + ) + -> cast(dtype="fp32", + torch_scope="module_2", + pass_scope=["merge_consecutive_transposes", "add_fp16_cast"] + ) + -> output + + In the above case, two transpose ops first merged into a single transpose op, + and the graph is transformed into fp16. + + Hence, the final transpose op should have scope information from both graph passes. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_1"), + ): + x = mb.transpose(x=x, perm=[0, 2, 1, 3]) + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="module_2"), + ): + return mb.transpose(x=x, perm=[3, 2, 0, 1]) + + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_TYPE) + + apply_pass_and_basic_check(prog, "common::merge_consecutive_transposes") + apply_pass_and_basic_check(prog, "common::add_fp16_cast") + apply_pass_and_basic_check(prog, "common::cast_optimization") + apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == ["cast", "transpose", "cast"] + + cast_ops = prog.find_ops(op_type="cast") + assert len(cast_ops) == 2 + assert len(cast_ops[0].scopes) == 2 + assert cast_ops[0].scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "merge_consecutive_transposes", + "add_fp16_cast", + ] + assert cast_ops[0].scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_2"] + assert len(cast_ops[1].scopes) == 2 + assert cast_ops[1].scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "merge_consecutive_transposes", + "add_fp16_cast", + ] + assert cast_ops[1].scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_2", + ] + + transpose_op = prog.find_ops(op_type="transpose")[0] + assert len(transpose_op.scopes) == 2 + assert transpose_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "merge_consecutive_transposes", + "add_fp16_cast", + ] + assert transpose_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_2", + ] diff --git a/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py b/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py index b1cbcf52f..ebcd0d7e5 100644 --- a/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py +++ b/coremltools/converters/mil/mil/passes/tests/test_quantization_passes.py @@ -16,6 +16,7 @@ from coremltools._deps import _HAS_TORCH, _IS_MACOS, MSG_TORCH_NOT_FOUND from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil.passes.defs import quantization +from coremltools.converters.mil.mil.passes.defs.quantization import add_fp16_cast from coremltools.converters.mil.mil.types import numpy_type_to_builtin_type from coremltools.converters.mil.testing_utils import ( apply_pass_and_basic_check, @@ -31,7 +32,8 @@ class TestTensorwiseAffineDequantizeConstElimination: - def test_eliminate_transpose(self): + @pytest.mark.parametrize("axis", (None, 0, 1, -1)) + def test_eliminate_transpose(self, axis): """ Input graph: data -> constexpr_affine_dequantize -> transpose @@ -41,21 +43,28 @@ def test_eliminate_transpose(self): where new_data is the value after applying transpose to data """ - quantized_data = np.random.randint(0, 256, (1, 2, 3, 4)).astype(np.int8) + SHAPE = (1, 2, 3, 4) + quantized_data = np.random.randint(0, 256, SHAPE).astype(np.int8) + if axis is None: + axis = 0 # although tensor-wise, constexpr_affine_dequantize requires a (dummy) axis + scale = np.random.rand() + zero_point = np.random.randint(-127, 128, dtype=np.int8) + else: + size = SHAPE[axis] + scale = np.random.rand(size) + zero_point = np.random.randint(-127, 128, size, dtype=np.int8) @mb.program(input_specs=[], opset_version=ct.target.iOS16) def prog(): res = mb.constexpr_affine_dequantize( quantized_data=quantized_data, - axis=0, - scale=8.9, - zero_point=np.int8(34), + axis=axis, + scale=scale, + zero_point=zero_point, ) return mb.transpose(x=res, perm=(2, 0, 1, 3)) - apply_pass_and_basic_check( - prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" - ) + apply_pass_and_basic_check(prog, "common::merge_affine_dequantize_with_consecutive_ops") assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] @@ -84,9 +93,7 @@ def prog(): ) return mb.reshape(x=res, shape=(3, -1)) - apply_pass_and_basic_check( - prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" - ) + apply_pass_and_basic_check(prog, "common::merge_affine_dequantize_with_consecutive_ops") assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] @@ -115,9 +122,7 @@ def prog(): ) return mb.expand_dims(x=res, axes=(0, 2, 4)) - apply_pass_and_basic_check( - prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" - ) + apply_pass_and_basic_check(prog, "common::merge_affine_dequantize_with_consecutive_ops") assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] @@ -147,9 +152,7 @@ def prog(): ) return mb.squeeze(x=res, axes=axis) - apply_pass_and_basic_check( - prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" - ) + apply_pass_and_basic_check(prog, "common::merge_affine_dequantize_with_consecutive_ops") assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] @@ -182,9 +185,7 @@ def prog(): res = mb.expand_dims(x=res, axes=(0, 2, 4)) return mb.squeeze(x=res, axes=(2,)) - apply_pass_and_basic_check( - prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" - ) + apply_pass_and_basic_check(prog, "common::merge_affine_dequantize_with_consecutive_ops") assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] new_op = prog.find_ops(op_type="constexpr_affine_dequantize", exactly_one=True)[0] @@ -196,39 +197,6 @@ def prog(): np.testing.assert_array_equal(new_op.quantized_data.val, expected_quantized_data) - def test_negative_channel_wise_pattern(self): - """ - If ``constexpr_affine_dequantize`` is not tensor-wise, - the graph is not changed. - """ - quantized_data = np.random.randint(0, 256, (2, 3, 4)).astype(np.int8) - - @mb.program(input_specs=[], opset_version=ct.target.iOS16) - def prog(): - x = mb.constexpr_affine_dequantize( - quantized_data=quantized_data, - axis=0, - scale=[8.9, 6.5], - zero_point=np.int8(34), - ) - y = mb.constexpr_affine_dequantize( - quantized_data=quantized_data, - axis=0, - scale=8.9, - zero_point=np.int8([34, 56]), - ) - return mb.transpose(x=x, perm=(1, 0, 2)), mb.transpose(x=y, perm=(1, 0, 2)) - - apply_pass_and_basic_check( - prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" - ) - assert get_op_types_in_program(prog) == [ - "constexpr_affine_dequantize", - "constexpr_affine_dequantize", - "transpose", - "transpose", - ] - def test_negative_non_linked_list_pattern(self): """ If ``quantized_data`` feeds into multiple ``constexpr_affine_dequantize`` ops, @@ -253,9 +221,7 @@ def prog(): ) return mb.transpose(x=x, perm=(1, 0, 2)), mb.reshape(x=y, shape=(24,)) - apply_pass_and_basic_check( - prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" - ) + apply_pass_and_basic_check(prog, "common::merge_affine_dequantize_with_consecutive_ops") assert get_op_types_in_program(prog) == [ "constexpr_affine_dequantize", "constexpr_affine_dequantize", @@ -282,9 +248,7 @@ def prog(): y = mb.transpose(x=x, perm=(0, 3, 2, 1)) return x, y - apply_pass_and_basic_check( - prog, "common::merge_tensorwise_affine_dequantize_with_consecutive_ops" - ) + apply_pass_and_basic_check(prog, "common::merge_affine_dequantize_with_consecutive_ops") assert get_op_types_in_program(prog) == [ "constexpr_affine_dequantize", "transpose", @@ -1741,10 +1705,11 @@ def prog(): return y assert get_op_types_in_program(prog) == ["dequantize"] + dequantize_op = prog.find_ops(op_type="dequantize")[0] + assert dequantize_op.outputs[0].val is None + assert dequantize_op.can_materialize_val() - prev_prog, prev_block, block = apply_pass_and_basic_check( - prog, "common::dequantize_to_constexpr" - ) + apply_pass_and_basic_check(prog, "common::dequantize_to_constexpr") assert get_op_types_in_program(prog) == ["constexpr_affine_dequantize"] @pytest.mark.parametrize( @@ -2196,20 +2161,150 @@ def prog(x): ) +class TestTransformFunctionSignatures: + @staticmethod + def test_empty(): + """ + Case where the input var is also a block output. + """ + # case 1 + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + return x + + graph_pass = add_fp16_cast() + block = prog.functions["main"] + graph_pass.transform_function_signatures(block) + apply_pass_and_basic_check(prog, "common::dead_code_elimination") + + assert get_op_types_in_program(prog) == [] + assert block.inputs["x"].dtype == types.fp16 + assert len(block.outputs) == 1 + assert block.outputs[0].dtype == types.fp16 + assert block.outputs[0] is block.inputs["x"] + + # case 2 + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + return x, mb.relu(x=x), x, x + + graph_pass = add_fp16_cast() + block = prog.functions["main"] + graph_pass.transform_function_signatures(block) + + assert block.inputs["x"].dtype == types.fp16 + assert len(block.outputs) == 4 + + assert block.outputs[0].dtype == types.fp16 + assert block.outputs[2].dtype == types.fp16 + assert block.outputs[3].dtype == types.fp16 + + assert block.outputs[1].dtype == types.fp32 + + assert block.outputs[0] is block.inputs["x"] + assert block.outputs[2] is block.inputs["x"] + assert block.outputs[3] is block.inputs["x"] + + assert all([x.dtype == types.fp16 for x in block.output_types]) + + assert get_op_types_in_program(prog) == ["cast", "relu"] + cast_op = block.find_ops(op_type="cast")[0] + assert cast_op.dtype.val == "fp32" + + @staticmethod + def test_simple(): + """ + Input graph: + + input(fp32) -> relu -> output + + Output graph: + + input(fp16) -> cast(dtype="fp32") -> relu -> output, + + with function.output_types = [ct.TesorType(dtype=types.fp16)] + + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + return mb.relu(x=x) + + graph_pass = add_fp16_cast() + block = prog.functions["main"] + graph_pass.transform_function_signatures(block) + + assert block.inputs["x"].dtype == types.fp16 + + assert get_op_types_in_program(prog) == ["cast", "relu"] + cast_op = block.find_ops(op_type="cast")[0] + assert cast_op.dtype.val == "fp32" + + assert len(block.outputs) == 1 + assert block.outputs[0].dtype == types.fp32 + + assert len(block.output_types) == 1 + assert block.output_types[0].dtype == types.fp16 + + @staticmethod + def test_simple_2(): + """ + Input graph: + + input(fp32) -> identity -> cast(dtype="int32") -> output_1 + | + .-> output_2 + + Output graph: + + input(fp16) -> cast(dtype="fp32") -> identity -> cast(dtype="int32") -> output_1 + | + .-> output_2, + + with function.output_types = [ct.TesorType(dtype=types.int32), ct.TesorType(dtype=types.fp16)] + + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + x = mb.identity(x=x) + return mb.cast(x=x, dtype="int32"), x + + graph_pass = add_fp16_cast() + block = prog.functions["main"] + graph_pass.transform_function_signatures(block) + + assert block.inputs["x"].dtype == types.fp16 + + assert get_op_types_in_program(prog) == ["cast", "identity", "cast"] + cast_ops = block.find_ops(op_type="cast") + assert cast_ops[0].dtype.val == "fp32" + assert cast_ops[1].dtype.val == "int32" + + assert len(block.outputs) == 2 + assert block.outputs[0].dtype == types.int32 + assert block.outputs[1].dtype == types.fp32 + + assert len(block.output_types) == 2 + assert block.output_types[0].dtype == types.int32 + assert block.output_types[1].dtype == types.fp16 + + class TestInt32CastToInt16: @pytest.mark.parametrize( - "x_dtype, dynamic, opset_version", + "x_dtype, dynamic, has_neg, opset_version", itertools.product( [np.int32, np.float32], [True, False], + [True, False], [ct.target.iOS15, ct.target.iOS16, ct.target.iOS17], ), ) - def test_gather_int16_indices(self, x_dtype, dynamic, opset_version): + def test_gather_int16_indices(self, x_dtype, dynamic, has_neg, opset_version): @mb.program(opset_version=opset_version) def prog_static(): params = np.array([[1, 2, 3], [4, 5, 6]], dtype=x_dtype) - indices = np.array([1, 0], dtype=np.int32) + indices = np.array([-2, 0] if has_neg else [1, 0], dtype=np.int32) return mb.gather(x=params, indices=indices, axis=-1) @mb.program( @@ -2241,7 +2336,7 @@ def prog_dynamic(x, indices): assert get_op_types_in_program(prog) == expected_ops indices_cast_op_idx = 1 if x_dtype == np.int32 else 0 cast_op = block.find_ops(op_type="cast")[indices_cast_op_idx] - assert cast_op.dtype.val == "int16" + assert cast_op.dtype.val == "int16" if has_neg else "uint16" assert len(cast_op.outputs) == 1 assert len(cast_op.outputs[0].child_ops) == 1 assert cast_op.outputs[0].child_ops[0].op_type == "gather" @@ -2255,19 +2350,42 @@ def prog_dynamic(x, indices): rtol=1e-05, ) + def test_gather_int16_scalar_indices(self): + @mb.program(input_specs=[], opset_version=ct.target.iOS17) + def prog_static(): + params = np.array([1, 2, 3, 4], dtype=np.int32) + res = mb.gather(x=params, indices=0, axis=0, batch_dims=0, validate_indices=False) + return res + + @mb.program( + input_specs=[mb.TensorSpec(shape=(4,), dtype=types.int32)], + opset_version=ct.target.iOS17, + ) + def prog_dynamic(x): + return mb.gather(x=x, indices=0, axis=0) + + for prog in (prog_static, prog_dynamic): + assert get_op_types_in_program(prog) == ["gather"] + prev_prog, _, block = apply_pass_and_basic_check(prog, "common::add_int16_cast") + expected_ops = ["cast", "cast", "gather", "cast"] + assert get_op_types_in_program(prog) == expected_ops + @pytest.mark.parametrize( - "x_dtype, dynamic, opset_version", + "x_dtype, dynamic, has_neg, opset_version", itertools.product( [np.int32, np.float32], [True, False], + [True, False], [ct.target.iOS15, ct.target.iOS16, ct.target.iOS17], ), ) - def test_gather_along_axis_int16_indices(self, x_dtype, dynamic, opset_version): + def test_gather_along_axis_int16_indices(self, x_dtype, dynamic, has_neg, opset_version): @mb.program(opset_version=opset_version) def prog_static(): params = np.array([[1, 2, 3], [4, 5, 6]], dtype=x_dtype) - indices = np.array([[1, 0, 1], [1, 1, 0]], dtype=np.int32) + indices = np.array( + [[-2, 0, -2], [-2, -2, 0]] if has_neg else [[1, 0, 1], [1, 1, 0]], dtype=np.int32 + ) return mb.gather_along_axis(x=params, indices=indices, axis=-1) @mb.program( @@ -2299,7 +2417,7 @@ def prog_dynamic(x, indices): assert get_op_types_in_program(prog) == expected_ops indices_cast_op_idx = 1 if x_dtype == np.int32 else 0 cast_op = block.find_ops(op_type="cast")[indices_cast_op_idx] - assert cast_op.dtype.val == "int16" + assert cast_op.dtype.val == "int16" if has_neg else "uint16" assert len(cast_op.outputs) == 1 assert len(cast_op.outputs[0].child_ops) == 1 assert cast_op.outputs[0].child_ops[0].op_type == "gather_along_axis" @@ -2336,17 +2454,25 @@ def prog(x, indices): assert cast_op.dtype.val == "int16" assert cast_op.outputs[0] == block.find_ops(op_type="gather")[0].indices - def test_gather_static_overflow_int16(self): - """Indices cannot be represented by int16 range, don't cast to int16.""" + @pytest.mark.parametrize("overflow_uint16", [True, False]) + def test_gather_static_overflow_int16(self, overflow_uint16): + """Indices cannot be represented by int16 range, but might be represented by uint16.""" + max_index = 65536 if overflow_uint16 else 32768 @mb.program(opset_version=ct.target.iOS17) def prog(): - params = np.array([[1, 2]] * 32769, dtype=np.float32) - indices = np.array([32768, 0], dtype=np.int32) + params = np.array([[1, 2]] * (max_index + 1), dtype=np.float32) + indices = np.array([max_index, 0], dtype=np.int32) return mb.gather(x=params, indices=indices, axis=0) prev_prog, _, block = apply_pass_and_basic_check(prog, "common::add_int16_cast") - assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) + if overflow_uint16: + assert get_op_types_in_program(prog) == get_op_types_in_program(prev_prog) + else: + assert get_op_types_in_program(prog) == ["cast", "gather"] + cast_op = block.find_ops(op_type="cast")[0] + assert cast_op.dtype.val == "uint16" + assert cast_op.outputs[0] == block.find_ops(op_type="gather")[0].indices @patch( "coremltools.converters.mil.mil.passes.defs.quantization.add_int16_cast._PREFER_INT16_OPS", diff --git a/coremltools/converters/mil/mil/program.py b/coremltools/converters/mil/mil/program.py index 462e88ffc..10deb389d 100644 --- a/coremltools/converters/mil/mil/program.py +++ b/coremltools/converters/mil/mil/program.py @@ -4,14 +4,13 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause from collections import defaultdict -from typing import Dict, List +from typing import Dict, List, Optional, Union import numpy as _np import sympy as _sm from coremltools import _logger as logger from coremltools.converters.mil._deployment_compatibility import AvailableTarget as _target -from coremltools.converters.mil.input_types import InputType from coremltools.converters.mil.mil.input_type import InternalInputType from coremltools.converters.mil.mil.ops.helper import _get_version_of_op from coremltools.converters.mil.mil.var import ListVar @@ -19,6 +18,7 @@ from . import types from .block import Function from .operation import Operation +from .scope import ScopeSource from .types.symbolic import k_num_internal_syms, k_used_symbols from .var import Var @@ -28,20 +28,19 @@ class Program: def _get_opset_str_value(op): return f"coremltools.target.{op.name}" - @staticmethod - def _get_supported_dialect_opset() -> List[str]: - """ - Return a list of supported dialect opsets at runtime. - """ - return [] - def __init__(self): - self.main_input_types = [] - self.main_output_types = None self.functions = {} - self.parameters = {} self.skip_all_passes = False + def _add_essential_scope_source( + self, scope_source: Union[ScopeSource, List[ScopeSource]] + ) -> None: + """ + Add essential scope sources to functions. + """ + for func in self.functions.values(): + func._add_essential_scope_source(scope_source) + def _get_dialect_namespaces(self) -> Dict[str, List[Operation]]: """ Return a dict which maps the dialect namespace into a list of corresponding operations. @@ -49,7 +48,7 @@ def _get_dialect_namespaces(self) -> Dict[str, List[Operation]]: res = defaultdict(list) def get_dialect_namespaces_block(block): - for op in list(block.operations): + for op in block.operations: for b in op.blocks: get_dialect_namespaces_block(b) if hasattr(op, "_dialect_namespace"): @@ -72,7 +71,7 @@ def _get_max_opset_version_and_op(self): def _check_ops_version_compatibility(self, max_opset_version): def check_version_compatibility_block(block): - for op in list(block.operations): + for op in block.operations: for b in op.blocks: check_version_compatibility_block(b) if not hasattr(op, "_op_variants") or not isinstance(op._op_variants, dict): @@ -146,10 +145,13 @@ def _check_invalid_tensor_rank_block(block): _check_invalid_tensor_rank_block(b) for o in op.outputs: if not isinstance(o, ListVar) and (o.rank < 0 or o.rank >= 6): - if op.op_type == "const" and len(o.child_ops) == 1 and \ - o.child_ops[0].op_type == "constexpr_lut_to_dense": - # For lut op, the lookup table is allowed to have rank > 5. - continue + if op.op_type == "const" or op.op_type.startswith("constexpr_"): + if all( + child_op.op_type.startswith("constexpr_") + for child_op in o.child_ops + ): + # For const/constexpr op's constexpr output, tensor with rank > 5 is ok. + continue raise ValueError( f'Core ML only supports tensors with rank <= 5. Layer "{op.name}", ' f'with type "{op.op_type}", outputs a rank {o.rank} tensor. ' @@ -207,20 +209,6 @@ def add_function(self, name, ssa_func): def add_parameters(self, name, ssa_val): raise NotImplementedError() - def set_main_input_types(self, inputs): - if not isinstance(inputs, tuple): - raise ValueError("main inputs should be tuple of TensorType or ImageType") - elif not all([isinstance(inp, InputType) for inp in inputs]): - raise ValueError("main inputs should be tuple of InputSpec") - self.main_input_types = inputs - - def set_main_output_types(self, outputs=None): - if outputs is not None: - if not (isinstance(outputs, list) and all([isinstance(out, InputType) for out in outputs])): - raise TypeError("main outputs should be a list of type ct.TensorType or ct.ImageType") - self.main_output_types = outputs - - def find_ops(self, prefix=None, op_type=None, exactly_one=False): """ Return list of ops with name matching `prefix` if specified, and @@ -242,9 +230,51 @@ def find_ops(self, prefix=None, op_type=None, exactly_one=False): raise ValueError(msg.format(found_ops)) return found_ops - def validate(self): + def validate(self, check_essential_scope: Optional[bool] = False) -> None: for f in self.functions.values(): - f.validate() + f.validate(force_validate=True, check_essential_scope=check_essential_scope) + + def construct_debug_handle_to_ops_mapping(self) -> Dict: + """ + For PyMIL program translated from ExecuTorch only: Based on scope info inherited from EXIR, + construct a debug handle to ops mapping. The mapping format is something like + { + 1: [ + {"Type": "Program"}, + {"Type": "Function", "Name": "main"}, + {"Type": "Block"}, + {"Type": "Operation", "Operation_Type": "add", "Output": "z"} + ] + } + where `1`, `"main"`, `"add"`, and `"z"` are example values of + the debug handle, function name, operation type, + and output var name (or the name of the first output var, if multiple outputs) + """ + debug_handle_to_ops_mapping = {} + for function_name, function in self.functions.items(): + for operation in function.operations: + # TODO (rdar://115846569): Handle multi-block case from EXIR + if len(operation.blocks) > 0: + raise NotImplementedError("Multi-block case has not been supported yet") + debug_handle = operation.scopes.get(ScopeSource.EXIR_DEBUG_HANDLE) + if debug_handle is None: + continue + debug_handle = debug_handle[0] + if debug_handle not in debug_handle_to_ops_mapping: + debug_handle_to_ops_mapping[debug_handle] = [] + debug_handle_to_ops_mapping[debug_handle].append( + [ + {"Type": "Program"}, + {"Type": "Function", "Name": function_name}, + {"Type": "Block"}, + { + "Type": "Operation", + "Operation_Type": operation.op_type, + "Output": operation.outputs[0].name, + }, + ] + ) + return debug_handle_to_ops_mapping def __getitem__(self, func_name): if func_name not in self.functions: @@ -255,10 +285,11 @@ def __getitem__(self, func_name): def __repr__(self): return self.__str__() - def __str__(self): + def __str__(self, print_attr: Optional[bool] = False) -> str: s = "" for f_name, f in self.functions.items(): - s += f.to_str(f_name) + s += "\n" + s += f.to_str(f_name, print_attr=print_attr) return s diff --git a/coremltools/converters/mil/mil/scope.py b/coremltools/converters/mil/mil/scope.py new file mode 100644 index 000000000..cc65b3f93 --- /dev/null +++ b/coremltools/converters/mil/mil/scope.py @@ -0,0 +1,337 @@ +# Copyright (c) 2024, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import copy +from collections import defaultdict +from enum import Enum +from typing import Dict, List, Union + +from attrs import define, field, validators + + +class ScopeSource(Enum): + """ + Pre-defined scope source enum: + + # Torch script related: + TORCHSCRIPT_MODULE_TYPE: + * Torchscript module type of a scope, which usually corresponds to the submodule object class type. + * If provided as str, it denotes a single scope, and cannot be an empty str. + * Nested scopes are represented by a list of str. + + TORCHSCRIPT_MODULE_NAME: + * Unique torchscript identifier for a scope, which usually corresponds to the submodule object name. + * If provided as str, it denotes a single scope. + * Nested scopes are represented by a list of str. + + # Core ML converter graph passes related: + COREMLTOOLS_GRAPH_PASS: + * This scope traces the graph transformations (graph passes) applied on the program. + * For instance, operations constructed under the "fuse_conv_batchnorm" pass, is going to have + the scopes attribute of ``{COREMLTOOLS_GRAPH_PASS: ["fuse_conv_batchnorm"]}``. + * If the op went through multiple graph pass transformations, it is represetned by a list of str. + For instance: ["fuse_conv_batchnorm", "add_fp16_cast"] means the op is created by "fuse_conv_batchnorm" + and then undergoes "add_fp16_cast". + + # Torch export related: + EXIR_DEBUG_HANDLE: + * The ``debug_handle`` metadata inherited from torch.fx.Node.meta in EXIR + * This metadata enables post-run analysis in ExecuTorch integration + * ExecuTorch uses integer as debug handle. When a MIL op can be traced back to ExecuTorch + (e.g. translated from torch op), we inherit the integer value + * If a MIL op cannot be traced back to ExecuTorch (e.g. created by graph pass), + then we use None to denote "no debug handle" + + + Examples + -------- + Here is an example of torchscript related scope enum: + + .. sourcecode:: python + + class SubModule(torch.nn.Module): + pass + + + class MainModule(torch.nn.Module): + def __init__(self): + self.submodule_1 = SubModule() + + def forward(self, x): + node = self.submodule_1(x) + return node + + + my_model = MainModule() + + when the above model is translated into pymil, the Operation corresponding to ``node`` would have: + + * TORCHSCRIPT_MODULE_TYPE: ["SubModule", ...] + * TORCHSCRIPT_MODULE_NAME: ["submodule_1", ...] + + in their scope attributes. + """ + + TORCHSCRIPT_MODULE_TYPE = 0 + TORCHSCRIPT_MODULE_NAME = 1 + COREMLTOOLS_GRAPH_PASS = 2 + EXIR_DEBUG_HANDLE = 3 + + +class ScopeStack(defaultdict): + """ + A utility class to handle the scope context manager + """ + + def __init__(self): + super().__init__(list) + + def get_curr_scopes(self) -> Dict[ScopeSource, List[str]]: + """ + Returns the current scope information as a dictionary. + """ + res = defaultdict(list) + for key, val in self.items(): + if len(val) == 0: + continue + scope_for_one_source = [] + for v in val: + scope_for_one_source.extend(v.data) + res[key] = scope_for_one_source + return res + + +SCOPE_STACK = ScopeStack() +VALID_OPS_TO_COPY_SCOPE_INFO = [] + + +def add_graph_pass_scope( + src_scopes: Dict[ScopeSource, List[str]], graph_pass_scopes: Dict[ScopeSource, List[str]] +) -> Dict[ScopeSource, List[str]]: + res = {} + """ + Construct a scope by adding graph pass scopes from ``graph_pass_scopes`` to ``src_scopes``. + + The rules are the following: + + (1) We append the COREMLTOOLS_GRAPH_PASS ScopeSource in ``graph_pass_scopes`` to the ``src_scopes``. + This will allow us to keep tracking the history of transformation. + For instance: + + Input: + + src_scopes = { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + graph_pass_scopes = { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2", "pass_3"], + } + + Output: + + res = { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2", "pass_3"], + } + + (2) Only COREMLTOOLS_GRAPH_PASS ScopeSource is allowed in ``graph_pass_scopes``. + + (3) Other ScopeSource will be passed down from ``src_scopes``. + + Input: + + src_scopes = { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.TORCHSCRIPT_MODULE_NAME: ["a1"], + ScScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + graph_pass_scopes = { + ScScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2", "pass_3"], + } + + Output: + + res = { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.TORCHSCRIPT_MODULE_NAME: ["a1"], + ScScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2", "pass_3"], + } + """ + res = defaultdict(list) + for scope_source_key in ScopeSource: + if scope_source_key in graph_pass_scopes: + assert ( + scope_source_key == ScopeSource.COREMLTOOLS_GRAPH_PASS + ), "Only ScopeSource.COREMLTOOLS_GRAPH_PASS is allowed in the graph_pass_scopes." + if ScopeSource.COREMLTOOLS_GRAPH_PASS in src_scopes: + old_graph_pass_data = copy.copy(src_scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS]) + else: + old_graph_pass_data = [] + new_graph_pass_data = copy.copy(graph_pass_scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS]) + res[ScopeSource.COREMLTOOLS_GRAPH_PASS] = old_graph_pass_data + new_graph_pass_data + elif scope_source_key in src_scopes: + res[scope_source_key] = copy.copy(src_scopes[scope_source_key]) + + return res + + +@define +class ScopeInfo: + """ + Parameters + ---------- + source: str + * Source of the scope. For instance, it could be a frontend framework like torchsccript, or a converter graph pass, etc. + * Must be type of ScopeSource Enum. + + data: Union[str, List[str]] + * Scope data. + * It could be type of str or List[str]. + + Examples + -------- + Here are examples of creating a ScopeInfo: + + .. sourcecode:: python + # A scope for a single torchscript module type + scope_info = ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, + data="Module_1", + ) + + # A scope for a two layers torchscript model hierarchy type + scope_info = ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, + data=["Module_1", "Module_2"], + ) + """ + + source: str = field(validator=validators.instance_of(ScopeSource)) + data: Union[str, List[str]] = field(validator=validators.instance_of((str, list))) + + def __attrs_post_init__(self): + # cleanup scope info + if self.source in ( + ScopeSource.TORCHSCRIPT_MODULE_NAME, + ScopeSource.TORCHSCRIPT_MODULE_TYPE, + ScopeSource.COREMLTOOLS_GRAPH_PASS, + ): + if not isinstance(self.data, list): + self.data = [self.data] + for i, val in enumerate(self.data): + if not isinstance(val, str): + raise ValueError( + f"Scope must be type of List[str]. Got element {val} with type {type(val)}." + ) + self.data[i] = val.replace(" ", "") + elif self.source == ScopeSource.EXIR_DEBUG_HANDLE: + if not isinstance(self.data, list): + self.data = [self.data] + for val in self.data: + if val is not None and not isinstance(val, int): + raise ValueError( + f"Scope must be None or type of List[int]. Got element {val} with type {type(val)}." + ) + + if self.source == ScopeSource.COREMLTOOLS_GRAPH_PASS: + if len(self.data) > 1: + raise ValueError( + f"COREMLTOOLS_GRAPH_PASS scope cannot have len > 1. Got {self.data}." + ) + + if self.source == ScopeSource.TORCHSCRIPT_MODULE_TYPE: + if "" in self.data: + raise ValueError( + "TORCHSCRIPT_MODULE_TYPE scope info cannot contains empty string." + ) + + if self.source == ScopeSource.EXIR_DEBUG_HANDLE: + if len(self.data) > 1: + raise ValueError(f"EXIR_DEBUG_HANDLE scope cannot have len > 1. Got {self.data}.") + + +class ScopeContextManger: + def __init__( + self, + *scopes: List[ScopeInfo], + ): + """ + A context manager pushes/pops the scope information, which makes the + operations created within it have the corresponding scope information. + + Parameters + ---------- + scopes: Optional[List[ScopeInfo]] (Optional) + * A list of ScopeInfo under the context manager. + * The source in each ScopeInfo cannot be duplicated. + * If not provided, this context manager does no affects. + + Examples + -------- + Here is an example of creating a scope for torchscript module heirarchy with type and name information. + + .. sourcecode:: python + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + ): + return mb.add(x=x, y=4.3, name="add_1") + + + In the above example, the "add_1" op will have two scope attributes, for torchscipt module type and name: + * TORCHSCRIPT_MODULE_TYPE: ["Module1"] + * TORCHSCRIPT_MODULE_NAME: ["module_1"] + + Here is an example of creating nested scopes: + + .. sourcecode:: python + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1"]), + ): + x = mb.add(x=x, y=4.3, name="add_1") + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module2"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_2"]), + ): + return mb.add(x=x, y=3.2, name="add_2") + + In the above example, the "add_1" op would have a scope attribute: + * TORCHSCRIPT_MODULE_TYPE: ["Module1"] + + while the "add_2" op would have scope attributes: + * TORCHSCRIPT_MODULE_TYPE: ["Module1", "Module2"] + * TORCHSCRIPT_MODULE_NAME: ["module_2"] + """ + self.scopes = scopes + # Validate scopes are type of ScopeInfo + for scope in self.scopes: + if not isinstance(scope, ScopeInfo): + raise ValueError( + f"mb.scope only accepts inputs of type ScopeInfo. Got {type(scope)}." + ) + + # validate there is no duplicated scope source + visited_scope_sources = set() + for scope in self.scopes: + if scope.source in visited_scope_sources: + raise ValueError(f"Scope source {scope.source} duplicated.") + visited_scope_sources.add(scope.source) + + def __enter__(self): + for scope in self.scopes: + SCOPE_STACK[scope.source].append(scope) + if scope.source == ScopeSource.COREMLTOOLS_GRAPH_PASS: + VALID_OPS_TO_COPY_SCOPE_INFO.append(set()) + + def __exit__(self, type, value, traceback): + for scope in self.scopes: + SCOPE_STACK[scope.source].pop() + if scope.source == ScopeSource.COREMLTOOLS_GRAPH_PASS: + VALID_OPS_TO_COPY_SCOPE_INFO.pop() diff --git a/coremltools/converters/mil/mil/tests/test_block.py b/coremltools/converters/mil/mil/tests/test_block.py index 0f943c710..70da4f853 100644 --- a/coremltools/converters/mil/mil/tests/test_block.py +++ b/coremltools/converters/mil/mil/tests/test_block.py @@ -10,6 +10,7 @@ from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil.passes.tests.test_passes import CONSTEXPR_FUNCS +from coremltools.converters.mil.mil.utils import CacheDoublyLinkedList from coremltools.converters.mil.testing_utils import ( assert_same_output_names, assert_same_output_shapes, @@ -61,7 +62,7 @@ def prog(x0): print("after:\n{}".format(prog)) assert block.inputs["x0"] == block.find_ops(op_type="log")[0].inputs["x"] assert len(block.operations) == 2 # const op for epsilon + log - assert block.operations[1].op_type == "log" + assert list(block.operations)[1].op_type == "log" assert block.outputs[0] == x1 @@ -492,3 +493,58 @@ def prog(x0, y0): assert block.outputs[0].op.name == "new_output" assert block.outputs[1].op.name == "new_output" assert len(block.outputs[0].consuming_blocks) == 1 + + +class TestCacheDoublyLinkedList: + def test_basic(self): + operations = CacheDoublyLinkedList() + + operations.insert_op_before(1) + assert list(operations) == [1] + + operations.insert_op_before(2, before_op=1) + assert list(operations) == [2, 1] + + operations.insert_op_before(3) + assert list(operations) == [2, 1, 3] + + operations.insert_op_before(4, before_op=1) + assert list(operations) == [2, 4, 1, 3] + + operations.remove(2) + assert list(operations) == [4, 1, 3] + + operations.remove(3) + assert list(operations) == [4, 1] + + operations.remove(4) + assert list(operations) == [1] + + node = operations._get_node_from_op(1) + operations.remove(1) + assert list(operations) == [] + assert node.prev is CacheDoublyLinkedList.INVALID_NODE + assert node.next is CacheDoublyLinkedList.INVALID_NODE + + operations.insert_op_before(0) + assert list(operations) == [0] + + operations = CacheDoublyLinkedList([1, 2, 3]) + assert list(operations) == [1, 2, 3] + + operations = CacheDoublyLinkedList([]) + assert list(operations) == [] + + def test_reversed(self): + operations = CacheDoublyLinkedList([1, 2, 3]) + assert list(reversed(operations)) == [3, 2, 1] + + def test_error(self): + operations = CacheDoublyLinkedList([1, 2, 3]) + assert operations[0] == 1 + assert operations[-1] == 3 + # Indexing doubly linked list is super expensive, we need to error out. + with pytest.raises( + ValueError, match="Doubly linked list does not support indexing other than 0, -1." + ): + operations[1] diff --git a/coremltools/converters/mil/mil/tests/test_debug.py b/coremltools/converters/mil/mil/tests/test_debug.py index b6601ccfc..0fa69dbf0 100644 --- a/coremltools/converters/mil/mil/tests/test_debug.py +++ b/coremltools/converters/mil/mil/tests/test_debug.py @@ -35,7 +35,7 @@ def compute_ground_truth_answer(input): square = x * x tanh = np.tanh(square) return {"output_0": square, "output_1":tanh} - + class TestExtractSubModel: def test_extract_submodel_error_handling(self): @@ -48,16 +48,16 @@ def test_extract_submodel_error_handling(self): invalid_outputs = ["output_1", 1] with pytest.raises(ValueError, match="outputs must be a list of str. Got element 1 with type ."): - extract_submodel(mlmodel, outputs=invalid_outputs) + extract_submodel(mlmodel, outputs=invalid_outputs) invalid_outputs = ["output_1", "output_1"] with pytest.raises(ValueError, match="outputs must be a list of unique elements. 'output_1' occurs 2 times"): extract_submodel(mlmodel, outputs=invalid_outputs) - + invalid_outputs = ["error"] with pytest.raises(ValueError, match="outputs \['error'\] not found in the function."): extract_submodel(mlmodel, outputs=invalid_outputs) - + model_dir = tempfile.TemporaryDirectory() mlmodel_path = os.path.join(model_dir.name, "model.mlmodel") mlmodel.save(mlmodel_path) @@ -72,7 +72,7 @@ def test_extract_submodel_symbolic_input(self): | v mul -> tan -> output_2 - + If x has symbolic shape, then the subgraph mil -> tan should also have symbolic shape """ @mb.program(input_specs=[mb.TensorSpec(shape=(1, get_new_symbol()))]) @@ -85,15 +85,15 @@ def prog(x): model = ct.convert(prog, convert_to="neuralnetwork") submodel = extract_submodel(model, outputs=["tan"], inputs=["mul"]) func = submodel._mil_program.functions["main"] - + input = list(func.inputs.values())[0] assert input.shape[0] == 1 assert is_symbolic(input.shape[1]) - + output = func.outputs[0] assert output.shape[0] == 1 assert is_symbolic(output.shape[1]) - + def test_extract_submodel_complex(self): """ Input graph: @@ -117,7 +117,7 @@ def prog(x, y): Case 1: inputs = None outputs = [sin, mul] - + Output graph: x -> sin ------> output_1 | | @@ -126,12 +126,12 @@ def prog(x, y): """ submodel = extract_submodel(model, outputs=["sin", "mul"]) assert get_op_types_in_program(submodel._mil_program) == ["sin", "add", "mul"] - + """ Case 2: inputs = None outputs = [sin, add] - + Output graph: x -> sin -> output_1 | @@ -140,12 +140,12 @@ def prog(x, y): """ submodel = extract_submodel(model, outputs=["sin", "add"]) assert get_op_types_in_program(submodel._mil_program) == ["sin", "add"] - + """ Case 3: inputs = None outputs = [mul] - + Output graph: x -> sin ----- | | @@ -154,12 +154,12 @@ def prog(x, y): """ submodel = extract_submodel(model, outputs=["mul"]) assert get_op_types_in_program(submodel._mil_program) == ["sin", "add", "mul"] - + """ Case 4: inputs = None outputs = [sin, sub] - + Output graph: x -> sin -> sub -> output_2 | @@ -168,14 +168,13 @@ def prog(x, y): y """ submodel = extract_submodel(model, outputs=["sin", "sub"]) - print(submodel._mil_program) assert get_op_types_in_program(submodel._mil_program) == ["sin", "sub"] - + """ Case 5: inputs = [x, y] outputs = [mul] - + Output graph: x -> sin ----- | | @@ -184,12 +183,12 @@ def prog(x, y): """ submodel = extract_submodel(model, outputs=["mul"], inputs=["x", "y"]) assert get_op_types_in_program(submodel._mil_program) == ["sin", "add", "mul"] - + """ Case 6: inputs = [mul] outputs = [tan] - + mul -> tan -> output_1 """ submodel = extract_submodel(model, outputs=["tan"], inputs=["mul"]) @@ -207,22 +206,22 @@ def prog(x, y): """ submodel = extract_submodel(model, outputs=["sub", "mul"], inputs=["sin", "add"]) assert get_op_types_in_program(submodel._mil_program) == ["sub", "mul"] - + """ Case 8 (Negative): inputs = [sin] outputs = [mul] - + mul not reachable merely through sin """ with pytest.raises(ValueError, match="output mul not reachable from inputs"): submodel = extract_submodel(model, outputs=["mul"], inputs=["sin"]) - + """ Case 9 (Negative): inputs = [mul] outputs = [sin] - + sin not reachable merely through sin """ with pytest.raises(ValueError, match="output sin not reachable from inputs"): @@ -242,7 +241,7 @@ def test_extract_submodel_neuralnetwork(self, compute_unit): # check that the submodel retains the same backend assert submodel.get_spec().WhichOneof("Type") == "neuralNetwork" - + # check that the submodel retains the same compute unit assert submodel.compute_unit == compute_unit @@ -286,7 +285,7 @@ def test_extract_submodel_mlprogram(self, compute_unit, store_to_disk): # check that the submodel retains the same backend assert submodel.get_spec().WhichOneof("Type") == "mlProgram" - + # check that the submodel retains the same compute unit assert submodel.compute_unit == compute_unit diff --git a/coremltools/converters/mil/mil/tests/test_programs.py b/coremltools/converters/mil/mil/tests/test_programs.py index 139bf4d62..5230600b6 100644 --- a/coremltools/converters/mil/mil/tests/test_programs.py +++ b/coremltools/converters/mil/mil/tests/test_programs.py @@ -8,9 +8,11 @@ import coremltools as ct from coremltools import _logger as logger +from coremltools.converters.mil import mil from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import Function, Program, types from coremltools.converters.mil.mil.passes.tests.test_passes import CONSTEXPR_FUNCS +from coremltools.converters.mil.mil.scope import ScopeInfo, ScopeSource, add_graph_pass_scope np.random.seed(0) @@ -375,7 +377,7 @@ def func_2(x): def func_3(x): return x - prog = Program() + prog = mil.Program() prog.add_function("func_1", func_1) prog.add_function("func_2", func_2) prog.add_function("func_3", func_3) @@ -397,12 +399,12 @@ def func_2(x): err_msg = "all functions must have the same opset_version." - prog = Program() + prog = mil.Program() prog.add_function("func_1", func_1) with pytest.raises(ValueError, match=err_msg): prog.add_function("func_2", func_2) - prog = Program() + prog = mil.Program() prog.add_function("func_2", func_2) with pytest.raises(ValueError, match=err_msg): prog.add_function("func_1", func_1) @@ -553,21 +555,1422 @@ def prog(x): prog, convert_to="mlprogram", minimum_deployment_target=ct.target.iOS16, + pass_pipeline=ct.PassPipeline.EMPTY, compute_units=ct.ComputeUnit.CPU_ONLY, compute_precision=compute_precision, ) - # If the transpose is removed by optimization passes, the conversion goes through + # If the transpose is removed by graph pass merge_affine_dequantize_with_consecutive_ops, + # the conversion goes through @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) def prog(x): constexpr = CONSTEXPR_FUNCS["constexpr_affine_dequantize"]((4, 3)) constexpr = mb.transpose(x=constexpr, perm=[0, 1]) return mb.linear(x=x, weight=constexpr) - mlmodel = ct.convert( - prog, - convert_to="mlprogram", - minimum_deployment_target=ct.target.iOS16, - compute_units=ct.ComputeUnit.CPU_ONLY, - compute_precision=compute_precision, - ) + mlmodel = ct.convert( + prog, + convert_to="mlprogram", + minimum_deployment_target=ct.target.iOS16, + compute_units=ct.ComputeUnit.CPU_ONLY, + compute_precision=compute_precision, + ) + +class TestScope: + @staticmethod + def test_basic_single_TorchScript_scope(): + # single scope with scope_name and scope_type + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data="module_1"), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="Module1"), + ): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == ["module_1"] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["Module1"] + + # single scope with scope_name and scope_type with list type + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1"]), + ): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == ["module_1"] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["Module1"] + + # single scope with scope_type and no scope_name + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op.scopes + + # nested scope in a single mb.scope call. Both scope_name and scope_type provided + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1", "module_2"] + ), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1", "Module2"]), + ): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == [ + "module_1", + "module_2", + ] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "Module1", + "Module2", + ] + + # nested scope in a single mb.scope call. Only scope_type provided + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1", "module_2"] + ), + ): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_1", + "module_2", + ] + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op.scopes + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["", ""]), + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1", "module_2"] + ), + ): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == ["", ""] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_1", + "module_2", + ] + + @staticmethod + def test_basic_nested_TorchScript_scope(): + # nested scope with scope_name and scope_type + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data="module_1"), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="Module1"), + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data="module_2"), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="Module2"), + ): + x = mb.add(x=x, y=5.4) + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == [ + "module_1", + "module_2", + ] + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "Module1", + "Module2", + ] + + add_op_2 = prog.find_ops(op_type="add")[1] + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == ["module_1"] + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["Module1"] + + # nested scope with scope_name and scope_type with list type + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1"]), + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_2"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module2"]), + ): + x = mb.add(x=x, y=5.4) + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == [ + "module_1", + "module_2", + ] + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "Module1", + "Module2", + ] + + add_op_2 = prog.find_ops(op_type="add")[1] + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == ["module_1"] + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["Module1"] + + # nested scope with scope_name and no scope_type + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_2"]), + ): + x = mb.add(x=x, y=5.4) + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op_1.scopes + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_1", + "module_2", + ] + + add_op_2 = prog.find_ops(op_type="add")[1] + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op_2.scopes + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["module_1"] + + # nested scope in a nested mb.scope call. Both scope_name and scope_type provided + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1", "module_2"] + ), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1", "Module2"]), + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data="module_3"), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="Module3"), + ): + x = mb.add(x=x, y=5.4) + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == [ + "module_1", + "module_2", + "module_3", + ] + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "Module1", + "Module2", + "Module3", + ] + + add_op_2 = prog.find_ops(op_type="add")[1] + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == [ + "module_1", + "module_2", + ] + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "Module1", + "Module2", + ] + + # nested scope in a single mb.scope call. Only scope_type provided + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1", "module_2"] + ), + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + x = mb.add(x=x, y=5.4) + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op_1.scopes + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_1", + "module_2", + "module_3", + ] + + add_op_2 = prog.find_ops(op_type="add")[1] + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op_2.scopes + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_1", + "module_2", + ] + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1", "module_2"] + ), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["", ""]), + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + x = mb.add(x=x, y=5.4) + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_1", + "module_2", + "module_3", + ] + assert add_op_1.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == ["", ""] + + add_op_2 = prog.find_ops(op_type="add")[1] + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_1", + "module_2", + ] + assert add_op_2.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == ["", ""] + + @staticmethod + def test_graph_pass_scope_handling(): + # default list type + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo( + source=ScopeSource.COREMLTOOLS_GRAPH_PASS, + data="pass_1", + ), + ): + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert add_op_1.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "pass_1", + ] + + # data cannot have len > 1 + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with pytest.raises( + ValueError, match="COREMLTOOLS_GRAPH_PASS scope cannot have len > 1." + ): + with mb.scope( + ScopeInfo( + source=ScopeSource.COREMLTOOLS_GRAPH_PASS, + data=["pass_1", "pass_2"], + ), + ): + return mb.add(x=x, y=0.0) + return x + + # nested graph pass scope is allowed + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo( + source=ScopeSource.COREMLTOOLS_GRAPH_PASS, + data="pass_1", + ), + ): + with mb.scope( + ScopeInfo( + source=ScopeSource.COREMLTOOLS_GRAPH_PASS, + data="pass_2", + ), + ): + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert add_op_1.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "pass_1", + "pass_2", + ] + + @staticmethod + def test_EXIR_scope_handling(): + # default list type + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope(ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[1])): + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert add_op_1.scopes[ScopeSource.EXIR_DEBUG_HANDLE] == [1] + + # data cannot have len > 1 + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with pytest.raises(ValueError, match="EXIR_DEBUG_HANDLE scope cannot have len > 1."): + with mb.scope(ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[2, 3])): + return mb.add(x=x, y=0.0) + return x + + # nested graph pass scope is allowed + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope(ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[None])): + with mb.scope(ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[0])): + return mb.add(x=x, y=0.0) + + add_op_1 = prog.find_ops(op_type="add")[0] + assert add_op_1.scopes[ScopeSource.EXIR_DEBUG_HANDLE] == [None, 0] + + @staticmethod + def test_invalid_dtype_error_out(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with pytest.raises( + ValueError, + match="Scope must be type of List\[str\]. Got element 9 with type \.", + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["m1", 9]), + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1", "Module2"] + ), + ): + return mb.add(x=x, y=5.4) + + with pytest.raises( + ValueError, + match="Scope must be type of List\[str\]. Got element 0 with type \.", + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["m1", "m2"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1", 0]), + ): + return mb.add(x=x, y=5.4) + return x + + @staticmethod + def test_empty_scope(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope(): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert ScopeSource.TORCHSCRIPT_MODULE_TYPE not in add_op.scopes + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op.scopes + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope(): + with mb.scope(): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert ScopeSource.TORCHSCRIPT_MODULE_TYPE not in add_op.scopes + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op.scopes + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope(): + with mb.scope(ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="m1")): + with mb.scope(): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == ["m1"] + assert ScopeSource.TORCHSCRIPT_MODULE_NAME not in add_op.scopes + + + @staticmethod + def test_empty_scope_type_error_out(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with pytest.raises( + ValueError, match="TORCHSCRIPT_MODULE_TYPE scope info cannot contains empty string." + ): + with mb.scope(ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="")): + with mb.scope(): + return mb.add(x=x, y=5.4) + + with pytest.raises( + ValueError, match="TORCHSCRIPT_MODULE_TYPE scope info cannot contains empty string." + ): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, + data=["a", ""], + ) + ): + with mb.scope(): + return mb.add(x=x, y=5.4) + return x + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, + data=["module_1"], + ) + ): + with pytest.raises( + ValueError, + match="TORCHSCRIPT_MODULE_TYPE scope info cannot contains empty string.", + ): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, + data=[""], + ) + ): + return mb.add(x=x, y=5.4) + with pytest.raises( + ValueError, + match="TORCHSCRIPT_MODULE_TYPE scope info cannot contains empty string.", + ): + with mb.scope( + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, + data=["a", "", ""], + ) + ): + return mb.add(x=x, y=5.4) + return x + + @staticmethod + def test_white_space_handling(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=[" module_1 "]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=[" Module1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=[" pass_1"]), + ): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == [ + "module_1", + ] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "Module1", + ] + assert add_op.scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS] == [ + "pass_1", + ] + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=[" Module1 ", " "]), + ScopeInfo( + source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=[" module_1 ", " module_2 "] + ), + ): + return mb.add(x=x, y=5.4) + + add_op = prog.find_ops(op_type="add")[0] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] == [ + "module_1", + "module_2", + ] + assert add_op.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME] == ["Module1", ""] + + @staticmethod + def test_duplicated_scope_source_error_out(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with pytest.raises( + ValueError, match="Scope source ScopeSource.TORCHSCRIPT_MODULE_TYPE duplicated." + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="a1"), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data="a2"), + ): + return mb.add(x=x, y=5.4) + return x + + @staticmethod + def test_check_prog_has_scope_error_out(): + def get_prog(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["Module1"]), + ): + x = mb.add(x=x, y=5.4) + x = mb.relu(x=x, name="invalid_op") + return x + + return prog + + prog = get_prog() + prog._add_essential_scope_source( + [ScopeSource.TORCHSCRIPT_MODULE_TYPE, ScopeSource.TORCHSCRIPT_MODULE_NAME] + ) + with pytest.raises( + ValueError, match="is missing essential scopes ScopeSource.TORCHSCRIPT_MODULE_TYPE" + ): + prog.validate(check_essential_scope=True) + + # If check_essential_scope is not passes, it will not error out + prog.validate() + + # No error if no essential scope source are set + prog = get_prog() + prog.validate(check_essential_scope=True) + + @staticmethod + def test_invalid_scope_source_type(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with pytest.raises(TypeError, match="'source' must be \"): + with mb.scope( + ScopeInfo(source="invalid_source", data="a1"), + ): + return mb.add(x=x, y=5.4) + return x + + @staticmethod + def test_invalid_scope_info_type(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with pytest.raises( + ValueError, + match="mb.scope only accepts inputs of type ScopeInfo. Got \.", + ): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + "invalid", + ): + return mb.add(x=x, y=5.4) + return x + + @staticmethod + def test_scope_setter_immutable(): + """ + When setting the `scopes` property for an op, the value should be deep copied. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + ): + x = mb.add(x=x, y=5.4) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_2"]), + ): + y = mb.add(x=x, y=5.4) + + x.scopes = y.scopes + y.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME][0] = "invalid" + assert x.scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME][0] == "module_2" + + return x + + @staticmethod + def test_scopes_for_function_inputs(): + """ + If a var's parent op is a placeholder, we cannot set its scopes. + And its scopes is an empty dictionary. + """ + + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 3))]) + def prog(x): + assert len(x.scopes) == 0 + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + ): + y = mb.add(x=x, y=5.4) + + with pytest.raises( + ValueError, + match="Cannot set scopes to a function input var", + ): + x.scopes = y.scopes + + return y + + @staticmethod + def test_add_graph_pass_scope(): + """ + Test the rules of merging two scopes. + """ + # Rule of merging COREMLTOOLS_GRAPH_PASS + old_scopes = { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + new_scopes = { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2", "pass_3"], + } + res = dict(add_graph_pass_scope(old_scopes, new_scopes)) + + assert res == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2", "pass_3"], + } + + # Ensure we make a copy of the list + old_scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS][0] = "invalid" + assert res == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2", "pass_3"], + } + new_scopes[ScopeSource.COREMLTOOLS_GRAPH_PASS][0] = "invalid" + assert res == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2", "pass_3"], + } + + # Another test + old_scopes = { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.TORCHSCRIPT_MODULE_NAME: ["a1"], + } + new_scopes = { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + res = add_graph_pass_scope(old_scopes, new_scopes) + + assert res == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.TORCHSCRIPT_MODULE_NAME: ["a1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + + # Ensure we make a copy of the list + old_scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE][0] = "invalid" + assert res == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.TORCHSCRIPT_MODULE_NAME: ["a1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + old_scopes[ScopeSource.TORCHSCRIPT_MODULE_NAME][0] = "invalid" + assert res == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.TORCHSCRIPT_MODULE_NAME: ["a1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + + # Test for other scope source + old_scopes = { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.TORCHSCRIPT_MODULE_NAME: ["a1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + new_scopes = { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + } + + with pytest.raises( + AssertionError, + match="Only ScopeSource.COREMLTOOLS_GRAPH_PASS is allowed in the graph_pass_scopes.", + ): + add_graph_pass_scope(old_scopes, new_scopes) + + @staticmethod + def test_scope_preservation_when_reconnect_graph(): + """ + If the _replace_var is doing reconnection of the graph, without any new op introduced, + no scope information is going to change. + """ + + def get_prog(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ): + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_2"]), + ): + sin = mb.sin(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + return prog + + # Case 1: No graph pass is involved, and only reconnect graph is done. + # Scope information will not change. + prog = get_prog() + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + } + + block._replace_var(var_1, var_2) + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + } + + # Case 2: Even the reconnection happens under graph pass, nothing will change. + prog = get_prog() + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["dummy_pass"])): + block._replace_var(var_1, var_2) + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + } + + # Case 3: old_var and new_var are created under a graph pass, and the reconnection happens under the + # same graph pass. Nothing will change still. + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["dummy_pass"])): + prog = get_prog() + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + + block._replace_var(var_1, var_2) + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + + # Case 4: Ops are created under a graph pass, and the reconnection happens outside the graph pass. + # Nothing happens. + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["dummy_pass"])): + prog = get_prog() + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + + block._replace_var(var_1, var_2) + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + + # Case 5: Ops are created under a graph pass 1, and the reconnection happens under graph pass2. + # Nothing happens. + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["dummy_pass"])): + prog = get_prog() + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["dummy_pass_2"])): + block._replace_var(var_1, var_2) + + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["dummy_pass"], + } + + # Case 6. old_var and new_var are created under the same graph pass + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + sin = mb.sin(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"])): + block._replace_var(var_1, var_2) + + assert var_1.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + + @staticmethod + def test_scope_passdown_when_new_var_created_under_graph_pass(): + """ + If a new_var is created by a graph pass, and the _replace_var happens under the same graph pass, + the scope information from the old_var is passed to new_var. + """ + + def get_prog(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ): + # This op is newly created by a pass_2 + sin = mb.sin(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + return prog + + # Case 1: _replace_var happens outside the graph pass. Nothing happens + prog = get_prog() + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2"], + } + + block._replace_var(var_1, var_2) + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2"], + } + + # Case 2: new_var created under a pass_2, and _replace_var happens under pass_2. Scope info is passed from the old_var + # to the new_var + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + with prog.functions["main"] as block: + op_1, op_2 = list(block.operations) + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ): + # This op is newly created by a pass_2 + sin = mb.sin(x=block.inputs["x"], before_op=op_2) + block._replace_var(op_1.outputs[0], sin) + + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2"], + } + + # Case 3: new_var created under a pass_2, but _replace_var happens under pass_3. + # Nothing happens. + prog = get_prog() + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_3"])): + block._replace_var(var_1, var_2) + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2"], + } + + # Case 4: new_var created under pass_2, and be passed down some scope info, + # so even though _replace_var happens under pass_2 again, nothing happens. + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_2"]), + ): + # This op is newly created by a pass_2, and other scope info already passed down + sin = mb.sin(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2"], + } + + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"])): + block._replace_var(var_1, var_2) + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_2"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2"], + } + + # Case 5: new_var created under pass_2, but the graph pass already finished, + # so even though _replace_var happens under pass_2 again, nothing happens. + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ): + # This op is newly created by a pass_2, and other scope info already passed down + sin = mb.sin(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + block = prog.functions["main"] + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2"], + } + + with mb.scope(ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"])): + block._replace_var(var_1, var_2) + assert var_1.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2"], + } + + # Case 6: new_var created under nested graph passes scope. And graph pass happens under pass_3. + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ): + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_3"]), + ): + sin = mb.sin(x=block.inputs["x"], before_op=ops[1]) + block._replace_var(ops[0].outputs[0], sin) + + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2", "pass_3"], + } + + # Case 7: new_var created under nested graph passes scope. And graph pass happens under pass_2. Nothing will happen in this case, since new_var is created under pass_3. + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ): + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_3"]), + ): + sin = mb.sin(x=block.inputs["x"], before_op=ops[1]) + block._replace_var(ops[0].outputs[0], sin) + + ops = list(block.operations) + var_1, var_2 = ops[0].outputs[0], ops[1].outputs[0] + assert var_1.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + assert var_2.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_2", "pass_3"], + } + + @staticmethod + def test_scope_passdown_resursive(): + """ + Test the resursive back propagation when passing down scope info. + """ + # Case 1 + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_3"]), + ): + return mb.relu(x=relu) + + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ): + # The subgraph is constructed under pass_2 + y = mb.leaky_relu(x=block.inputs["x"], alpha=0.8, before_op=ops[1]) + y = mb.add(x=y, y=y, before_op=ops[1]) + y = mb.leaky_relu(x=y, alpha=0.4, before_op=ops[1]) + + block._replace_var(ops[0].outputs[0], y) + + ops = list(block.operations) + assert ops[0].outputs[0].scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + + add_ops = block.find_ops(op_type="add") + const_ops = block.find_ops(op_type="const") + leaky_relu_ops = block.find_ops(op_type="leaky_relu") + + assert len(add_ops) == 1 + assert len(const_ops) == 2 + assert len(leaky_relu_ops) == 2 + + for op in leaky_relu_ops + add_ops + const_ops: + assert op.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2"], + } + + # Case 2: Test for VALID_OPS_TO_COPY_SCOPE_INFO in the scope back propagation + # The same var cannot be visited twice + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + return mb.relu(x=relu) + + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ): + # The subgraph is constructed under pass_2 + relu = ops[0].outputs[0] + + y = mb.leaky_relu(x=relu, alpha=0.8, before_op=ops[1]) + y = mb.concat(values=[y, y, relu, y], axis=0, before_op=ops[1]) + y1, y2, y3, y4 = mb.split(x=y, axis=0, num_splits=4, before_op=ops[1]) + + block._replace_var(relu, y1, anchor_op=y1.op) + + ops = list(block.operations) + relu_ops = block.find_ops(op_type="relu") + leaky_relu_op = block.find_ops(op_type="leaky_relu")[0] + concat_op = block.find_ops(op_type="concat")[0] + split_op = block.find_ops(op_type="split")[0] + + for op in [leaky_relu_op, concat_op, split_op]: + assert op.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2"], + } + + for op in relu_ops: + assert op.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + + # Case 3: Similar to case 2, but the relu op has torch scope. + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=x) + + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + return mb.relu(x=relu) + + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_2"]), + ): + # The subgraph is constructed under pass_2 + relu = ops[0].outputs[0] + + y = mb.leaky_relu(x=relu, alpha=0.8, before_op=ops[1]) + y = mb.concat(values=[y, y, relu, y], axis=0, before_op=ops[1]) + y1, y2, y3, y4 = mb.split(x=y, axis=0, num_splits=4, before_op=ops[1]) + + block._replace_var(relu, y1, anchor_op=y1.op) + + ops = list(block.operations) + relu_ops = block.find_ops(op_type="relu") + leaky_relu_op = block.find_ops(op_type="leaky_relu")[0] + concat_op = block.find_ops(op_type="concat")[0] + split_op = block.find_ops(op_type="split")[0] + + for op in [leaky_relu_op, concat_op, split_op]: + assert op.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1", "pass_2"], + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + } + + for op in relu_ops: + assert op.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["module_1"], + } + + @staticmethod + def test_scope_passdown_function_input_var(): + """ + If the old_var is function input var, and then the converter sets some default value for each scope source. + """ + # Case 1: with no essential scope set, no scope information is passed down + def get_prog(): + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_TYPE, data=["module_1"]), + ): + return mb.sin(x=x) + return prog + + prog = get_prog() + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=block.inputs["x"], before_op=ops[0]) + block._replace_var(block.inputs["x"], relu) + + assert relu.scopes == { + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + + # Case 2: essential scope set to TORCHSCRIPT_MODULE_TYPE + prog = get_prog() + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_TYPE) + + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=block.inputs["x"], before_op=ops[0]) + block._replace_var(block.inputs["x"], relu) + + assert relu.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_TYPE: ["__COREML__::TORCHSCRIPT_PLACEHOLDER"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + + # Case 3: essential scope set to TORCHSCRIPT_MODULE_NAME + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.TORCHSCRIPT_MODULE_NAME, data=["module_1"]), + ): + return mb.sin(x=x) + + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_NAME) + + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=block.inputs["x"], before_op=ops[0]) + block._replace_var(block.inputs["x"], relu) + + assert relu.scopes == { + ScopeSource.TORCHSCRIPT_MODULE_NAME: ["__COREML__::TORCHSCRIPT_PLACEHOLDER_x"], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } + + # Case 4: essential scope set to EXIR_DEBUG_HANDLE + @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))]) + def prog(x): + with mb.scope( + ScopeInfo(source=ScopeSource.EXIR_DEBUG_HANDLE, data=[1]), + ): + return mb.sin(x=x) + + prog._add_essential_scope_source(ScopeSource.EXIR_DEBUG_HANDLE) + + block = prog.functions["main"] + ops = list(block.operations) + + with block: + with mb.scope( + ScopeInfo(source=ScopeSource.COREMLTOOLS_GRAPH_PASS, data=["pass_1"]), + ): + # This op is created by pass_1 + relu = mb.relu(x=block.inputs["x"], before_op=ops[0]) + block._replace_var(block.inputs["x"], relu) + + assert relu.scopes == { + ScopeSource.EXIR_DEBUG_HANDLE: [None], + ScopeSource.COREMLTOOLS_GRAPH_PASS: ["pass_1"], + } diff --git a/coremltools/converters/mil/mil/types/type_int.py b/coremltools/converters/mil/mil/types/type_int.py index 2080d5b45..bcecd57a9 100644 --- a/coremltools/converters/mil/mil/types/type_int.py +++ b/coremltools/converters/mil/mil/types/type_int.py @@ -37,9 +37,12 @@ def val(self, v): numpy_type_to_builtin_type) if not isinstance(v, (np.generic, np.ndarray, sm.Basic)): - raise ValueError( - f"types should have value of numpy type or Symbols, got {type(v)} instead" - ) + try: + v = np.array(v) + except Exception: + raise ValueError( + f"types should have value of numpy type or Symbols, got {type(v)} instead" + ) if isinstance(v, sm.Basic): self._val = v diff --git a/coremltools/converters/mil/mil/types/type_mapping.py b/coremltools/converters/mil/mil/types/type_mapping.py index 8cc1d6e65..78e82a18a 100644 --- a/coremltools/converters/mil/mil/types/type_mapping.py +++ b/coremltools/converters/mil/mil/types/type_mapping.py @@ -4,11 +4,13 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause from collections import namedtuple +from typing import Optional, Union import numpy as _np import numpy as np import sympy as sm +import coremltools.converters.mil.backend.mil.helper as mil_helper import coremltools.proto.MIL_pb2 as _mil_pm from .get_type_info import get_type_info @@ -196,7 +198,7 @@ def builtin_to_resolution(builtin_type: type): return _TYPES_TO_RESOLUTION[builtin_type] -def builtin_to_range(builtin_type: type): +def builtin_to_range(builtin_type: type) -> RangeTuple: """ Given a builtin type, return its corresponding range. """ @@ -341,9 +343,22 @@ def is_builtin(t): return is_scalar(t) or is_tensor(t) or is_str(t) or is_tuple(t) -# Converts a numpy type to its types equivalent. -# Supports both dtypes and numpy primitive types. -def numpy_type_to_builtin_type(nptype): +def _numpy_dtype_instance_to_builtin_type(np_dtype: np.dtype) -> Optional[type]: + if np_dtype in _NPTYPES_TO_STRINGS: + return string_to_builtin(_NPTYPES_TO_STRINGS[np_dtype]) + return None + + +def numpy_type_to_builtin_type(nptype) -> type: + """ + Converts a numpy type to its builtin `types` equivalent. + Supports Python native types and numpy types. + """ + if isinstance(nptype, np.dtype): + builtin_type = _numpy_dtype_instance_to_builtin_type(nptype) + if builtin_type is not None: + return builtin_type + # If this is a data type object, use the corresponding scalar data type. if np.issubclass_(type(nptype), np.dtype): nptype = nptype.type @@ -473,11 +488,14 @@ def is_subtype(type1, type2): return type1 == type2 +def _numpy_val_to_bytes(val: Union[np.ndarray, np.generic]) -> bytes: + return val.tobytes() + def np_val_to_py_type(val): """Convert numpy val to python primitive equivalent. Ex: Given: val = np.array([True, False]) - Returns: [True, False] + Returns: (True, False) Given: val = np.array(32, dtype=np.int32) Returns 32 @@ -485,9 +503,9 @@ def np_val_to_py_type(val): if not isinstance(val, (_np.ndarray, _np.generic)): return val - if val.dtype in (_np.float16, _np.uint8, _np.int8, _np.uint32): - # Serialize to bytes because MIL read them from bytes field (see TensorValue in MIL.proto). - return val.tobytes() + builtin_type = numpy_type_to_builtin_type(val.dtype) + if builtin_type in mil_helper.IMMEDIATE_VALUE_TYPES_IN_BYTES: + return _numpy_val_to_bytes(val) else: if val.dtype in (_np.uint16, _np.int16): # TODO (rdar://111797203): Serialize to byte after MIL changes to read from byte field. diff --git a/coremltools/converters/mil/mil/types/type_tensor.py b/coremltools/converters/mil/mil/types/type_tensor.py index 71f400f18..bee987b1d 100644 --- a/coremltools/converters/mil/mil/types/type_tensor.py +++ b/coremltools/converters/mil/mil/types/type_tensor.py @@ -86,11 +86,13 @@ def val(self): @val.setter def val(self, v): if not isinstance(v, np.ndarray): - raise ValueError( - "tensor should have value of type ndarray, got {} instead".format( - type(v) + try: + v = np.array(v) + except: + raise ValueError( + f"tensor value type should be compatible with type np.ndarray, " + f"got {type(v)} instead" ) - ) v_type = numpy_type_to_builtin_type(v.dtype) promoted_type = promote_types(v_type, primitive) diff --git a/coremltools/converters/mil/mil/utils.py b/coremltools/converters/mil/mil/utils.py new file mode 100644 index 000000000..2377ed70a --- /dev/null +++ b/coremltools/converters/mil/mil/utils.py @@ -0,0 +1,134 @@ +# Copyright (c) 2024, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +from typing import Dict, List, Optional + +from .operation import Operation + +class OpNode: + """ + A helper node class for the doubly linked list. + It contains an Operation data and pointers to the previous and the next node. + """ + + def __init__(self, op: Operation): + self.op = op + self.next: Optional[OpNode] = None + self.prev: Optional[OpNode] = None + +class CacheDoublyLinkedList: + """ + This array-like data structure is useful to implement pymil's + core program transformations, including: + + 1. Insert an op at a target location (before a target op) + 2. Remove an op from the program + + Given the fact that each op in the list must be unique, a hash table + is maintained in this data structure, and hence the insert / pop can both be performed in O(1). + """ + + INVALID_NODE = OpNode(None) + + def __init__(self, array: Optional[List[Operation]] = None): + self.start: OpNode = None + self.end: OpNode = None + self.op_to_node: Dict[Operation, OpNode] = {} + + if array is not None: + for op in array: + self.insert_op_before(op) + + def insert_op_before(self, new_op: Operation, before_op: Optional[Operation] = None): + """ + Insert an op right before before_op. If before_op is None, + then the new op is appended in the end. + """ + if new_op in self.op_to_node: + raise ValueError(f"{new_op} already exisits.") + + new_node = OpNode(new_op) + + if before_op is None: + # If before op is None, the new node is appended in the end. + if self.start is None: + self.start = self.end = new_node + else: + self.end.next = new_node + new_node.prev = self.end + self.end = new_node + else: + anchor_node = self.op_to_node[before_op] + prev_node = anchor_node.prev + + if prev_node is None: + self.start = new_node + else: + prev_node.next = new_node + + new_node.prev = prev_node + new_node.next = anchor_node + anchor_node.prev = new_node + + self.op_to_node[new_op] = new_node + + def remove(self, op: Operation): + """ + Remove an op from the data structure. + """ + node = self.op_to_node[op] + prev_node, next_node = node.prev, node.next + + # reconnect the linked list + if prev_node is None: + self.start = next_node + else: + prev_node.next = next_node + + if next_node is None: + self.end = prev_node + else: + next_node.prev = prev_node + + node.prev = node.next = self.INVALID_NODE + + # remove op from the cache + del self.op_to_node[op] + + def __getitem__(self, idx: int) -> Operation: + """ + The indexing is expensive in doubly linked list, we should prevent direct access besides [0] and [-1]. + """ + if self.start is None: + raise ValueError("Cannot index an empty list.") + if idx >= len(self): + raise ValueError("Index out of range") + if idx == 0: + return self.start.op + elif idx == -1: + return self.end.op + raise ValueError("Doubly linked list does not support indexing other than 0, -1.") + + def _get_node_from_op(self, op: Operation) -> OpNode: + return self.op_to_node[op] + + def __iter__(self): + cursor = self.start + while cursor is not None: + if cursor is self.INVALID_NODE: + raise ValueError("Invalid iterator on CacheDoublyLinkedList.") + yield cursor.op + cursor = cursor.next + + def __reversed__(self): + cursor = self.end + while cursor is not None: + if cursor is self.INVALID_NODE: + raise ValueError("Invalid iterator on CacheDoublyLinkedList.") + yield cursor.op + cursor = cursor.prev + + def __len__(self) -> int: + return len(self.op_to_node) diff --git a/coremltools/converters/mil/mil/var.py b/coremltools/converters/mil/mil/var.py index d1fc96d6e..0d0756de8 100644 --- a/coremltools/converters/mil/mil/var.py +++ b/coremltools/converters/mil/mil/var.py @@ -3,12 +3,15 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -from typing import Optional, Union +import copy +from collections import defaultdict +from typing import Dict, List, Optional, Union from coremltools.converters.mil.mil import types from coremltools.converters.mil.mil.types import builtin_to_string from coremltools.converters.mil.mil.types.symbolic import any_symbolic +from .scope import ScopeSource class Var: """ @@ -153,7 +156,11 @@ def _propagate_constness_upstream(var): op = var.op if op is None: return False - if op.op_type.startswith("constexpr_") or var.val is not None: + if ( + op.op_type.startswith("constexpr_") + or (op.op_type == "dequantize" and op.can_materialize_val()) + or var.val is not None + ): return True flattened_inputs = op.get_flattened_inputs() return all([x.is_descendant_of_const for x in flattened_inputs]) @@ -167,6 +174,10 @@ def _set_nonreplaceable_vars_upstream(self): op = self.op if op is None: return + if op.op_type == "shape": + # For the meta data ops, like shape, we stop propogate the nonreplaceable_vars. + self.nonreplaceable_vars_upstream = set() + return if Var._is_nonreplaceable_var(self): self.nonreplaceable_vars_upstream = set([self]) else: @@ -283,6 +294,19 @@ def is_tensor_or_scalar_of(self, dtype: Union[str, type]): def __str__(self): return "%" + self.name + ": " + self.shape_str() + self.type_str() + @property + def scopes(self) -> Dict[ScopeSource, List[str]]: + if self.op is None: + # An empty dictionary is returned for function input vars. + return defaultdict(list) + return self.op.scopes + + @scopes.setter + def scopes(self, scopes: Dict[ScopeSource, List[str]]): + if self.op is None: + raise ValueError(f"Cannot set scopes to a function input var {self}.") + self.op.scopes = copy.deepcopy(scopes) + class ListVar(Var): __slots__ = ["_elem_type", "init_length", "dynamic_length"] diff --git a/coremltools/converters/mil/testing_utils.py b/coremltools/converters/mil/testing_utils.py index 791e8d0d8..ee7a34908 100644 --- a/coremltools/converters/mil/testing_utils.py +++ b/coremltools/converters/mil/testing_utils.py @@ -2,13 +2,12 @@ # # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -import itertools - import copy +import itertools import os from functools import partial from pathlib import Path -from typing import Dict, List, Tuple +from typing import Dict, List, Optional, Tuple, Union import numpy as np import pytest @@ -17,10 +16,12 @@ import coremltools as ct import coremltools.models.utils as coremltoolsutils from coremltools._deps import _IS_MACOS +from coremltools.converters.mil import mil from coremltools.converters.mil.mil import Block, Function, Program from coremltools.converters.mil.mil.passes.defs.preprocess import NameSanitizer as _NameSanitizer -from coremltools.converters.mil.mil.passes.defs.quantization import AbstractQuantizationPass +from coremltools.converters.mil.mil.passes.graph_pass import AbstractGraphPass from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY +from coremltools.converters.mil.mil.scope import ScopeSource from coremltools.proto import FeatureTypes_pb2 as ft np.random.seed(10) @@ -289,7 +290,7 @@ def ssa_fn(func): """ def wrapper(*args, **kwargs): - prog = Program() + prog = mil.Program() with Function({}) as ssa_func: func(*args, **kwargs) @@ -489,7 +490,7 @@ def get_core_ml_prediction( Return predictions of the given model. """ minimum_deployment_target = backend.opset_version - program = Program() + program = mil.Program() with Function(input_placeholders, opset_version=minimum_deployment_target) as ssa_func: output_vars = build(**ssa_func.inputs) if isinstance(output_vars, tuple): @@ -509,33 +510,88 @@ def get_core_ml_prediction( return mlmodel.predict(input_values) +def _decorate_prog_with_scope_if_not_present(prog: Program): + """ + For a program without any scope info, we manually add scopes to every op, + in ordere to test that all graph passes can preserve the source scope info. + """ + + def _is_scopes_present_in_program(prog: Program) -> bool: + """ + Return True is any op already has the scopes info. + """ + + def _is_scopes_present_in_block(block: Block) -> bool: + for op in block.operations: + for b in op.blocks: + if _is_scopes_present_in_block(b): + return True + if len(op.scopes) > 0: + return True + + for func in prog.functions.values(): + if _is_scopes_present_in_block(func): + return True + + def _decorate_prog_with_default_torch_scope(prog: Program): + """ + Decorate every op in the program with a default TORCHSCRIPT_MODULE_TYPE scope info. + """ + + def _decorate_block_with_default_torch_scope(block: Block): + for op in block.operations: + for b in op.blocks: + _decorate_block_with_default_torch_scope(b) + assert ScopeSource.TORCHSCRIPT_MODULE_TYPE not in op.scopes + op.scopes[ScopeSource.TORCHSCRIPT_MODULE_TYPE] = ["dummy"] + + for func in prog.functions.values(): + _decorate_block_with_default_torch_scope(func) + + prog._add_essential_scope_source(ScopeSource.TORCHSCRIPT_MODULE_TYPE) + + if not _is_scopes_present_in_program(prog): + _decorate_prog_with_default_torch_scope(prog) + def apply_pass_and_basic_check( - prog, - pass_name, - skip_output_name_check=False, - skip_output_type_check=False, - skip_input_name_check=False, - skip_input_type_check=False, -): + prog: Program, + pass_name: Union[str, AbstractGraphPass], + skip_output_name_check: Optional[bool] = False, + skip_output_type_check: Optional[bool] = False, + skip_input_name_check: Optional[bool] = False, + skip_input_type_check: Optional[bool] = False, + skip_function_name_check: Optional[bool] = False, + func_name: Optional[str] = "main", + skip_essential_scope_check: Optional[bool] = False, +) -> Tuple[Program, Block, Block]: """ Apply pass to the program """ prev_prog = copy.deepcopy(prog) - graph_pass = pass_name if isinstance(pass_name, AbstractQuantizationPass) else PASS_REGISTRY[pass_name] + + graph_pass = pass_name if isinstance(pass_name, AbstractGraphPass) else PASS_REGISTRY[pass_name] + + _decorate_prog_with_scope_if_not_present(prog) graph_pass(prog) - block = prog.functions["main"] - prev_block = prev_prog.functions["main"] - if not skip_output_name_check: - assert_same_output_names(prev_prog, prog) - if not skip_output_type_check: - assert_same_output_types(prev_prog, prog) - assert_same_output_shapes(prev_prog, prog) + prog.validate(check_essential_scope=not skip_essential_scope_check) + + if not skip_function_name_check: + if prev_prog.functions.keys() != prog.functions.keys(): + raise ValueError("function names changed during {pass_name}.") + + for name in prev_prog.functions: + if not skip_output_name_check: + assert_same_output_names(prev_prog, prog, name) + if not skip_output_type_check: + assert_same_output_types(prev_prog, prog, name) + assert_same_output_shapes(prev_prog, prog, name) + + if not skip_input_name_check: + assert_same_input_names(prev_prog, prog, name) + if not skip_input_type_check: + assert_same_input_types(prev_prog, prog, name) - if not skip_input_name_check: - assert_same_input_names(prev_prog, prog) - if not skip_input_type_check: - assert_same_input_types(prev_prog, prog) - return prev_prog, prev_block, block + return prev_prog, prev_prog.functions[func_name], prog.functions[func_name] def assert_prog_input_type(prog, expected_dtype_str, expected_name=None, index=0): @@ -644,7 +700,9 @@ def verify_prediction(mlmodel, multiarray_type=None): input_dict[input_desc.name] = random_gen_input_feature_type(input_desc) if multiarray_type is not None: input_dict[input_desc.name] = input_dict[input].astype(multiarray_type) - mlmodel.predict(input_dict) + res = mlmodel.predict(input_dict) + assert isinstance(res, dict) + assert len(res) >= 1 def assert_spec_input_image_type(spec, expected_feature_type): assert spec.description.input[0].type.imageType.colorSpace == expected_feature_type diff --git a/coremltools/models/_compiled_model.py b/coremltools/models/_compiled_model.py index bba2baf4f..fe9f7a168 100644 --- a/coremltools/models/_compiled_model.py +++ b/coremltools/models/_compiled_model.py @@ -16,6 +16,21 @@ class CompiledMLModel: + + @staticmethod + def _init_check(path: str, compute_units: _ComputeUnit): + if _macos_version() < (10, 13): + raise Exception("Loading compiled Core ML models is only support on macOS 10.13 or higher.") + if _MLModelProxy is None: + raise Exception("Unable to load any compiled models. This is most likely because" + " coremltools was installed from an egg rather than a wheel.") + + if not isinstance(path, str): + raise TypeError('The "path" parameter must be of type "str".') + if not isinstance(compute_units, _ComputeUnit): + raise TypeError('The "compute_units" parameter must be of type: "coremltools.ComputeUnit".') + + def __init__(self, path: str, compute_units: _ComputeUnit =_ComputeUnit.ALL): """ Loads a compiled Core ML model. @@ -46,19 +61,9 @@ def __init__(self, path: str, compute_units: _ComputeUnit =_ComputeUnit.ALL): -------- predict """ - if _macos_version() < (10, 13): - raise Exception("Loading compiled Core ML models is only support on macOS 10.13 or higher.") - if _MLModelProxy is None: - raise Exception("Unable to load any compiled models. This is most likely because" - " coremltools was installed from an egg rather than a wheel.") - - if not isinstance(path, str): - raise TypeError('The "path" parameter must be of type "str".') - if not isinstance(compute_units, _ComputeUnit): - raise TypeError('The "compute_units" parameter must be of type: "coremltools.ComputeUnit".') + self._init_check(path, compute_units) path = _expanduser(path) - self._proxy = _MLModelProxy(path, compute_units.name) diff --git a/coremltools/models/model.py b/coremltools/models/model.py index bc427a438..478a1a191 100644 --- a/coremltools/models/model.py +++ b/coremltools/models/model.py @@ -4,22 +4,24 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause import atexit as _atexit +from copy import deepcopy as _deepcopy +import json import os as _os import shutil as _shutil import tempfile as _tempfile +from typing import Optional as _Optional import warnings as _warnings -from copy import deepcopy as _deepcopy + import numpy as _np import numpy as _numpy from coremltools import ComputeUnit as _ComputeUnit +from coremltools import _logger as logger +from coremltools import proto as _proto from coremltools._deps import _HAS_TF_1, _HAS_TF_2, _HAS_TORCH from coremltools.converters.mil.mil.program import Program as _Program -from ..proto import FeatureTypes_pb2 as _ft -from ..proto import MIL_pb2 as _MIL_pb2 -from ..proto import Model_pb2 as _Model_pb2 from .utils import ( _MLMODEL_EXTENSION, _MLPACKAGE_AUTHOR_NAME, @@ -46,6 +48,12 @@ except: _ModelPackage = None +try: + from ..libcoremlpython import _MLModelProxy +except Exception as e: + logger.warning(f"Failed to load _MLModelProxy: {e}") + _MLModelProxy = None + _HAS_PIL = True try: from PIL import Image as _PIL_IMAGE @@ -130,38 +138,6 @@ def __iter__(self): yield f.name -def _get_proxy_and_spec(filename, compute_units, skip_model_load=False): - try: - from ..libcoremlpython import _MLModelProxy - except Exception: - _MLModelProxy = None - - filename = _os.path.expanduser(filename) - specification = _load_spec(filename) - - if _MLModelProxy and not skip_model_load: - - # check if the version is supported - engine_version = _MLModelProxy.maximum_supported_specification_version() - if specification.specificationVersion > engine_version: - # in this case the specification is a newer kind of .mlmodel than this - # version of the engine can support so we'll not try to have a proxy object - return None, specification, None - - try: - return _MLModelProxy(filename, compute_units.name), specification, None - except RuntimeError as e: - _warnings.warn( - "You will not be able to run predict() on this Core ML model." - + " Underlying exception message was: " - + str(e), - RuntimeWarning, - ) - return None, specification, e - - return None, specification, None - - def _try_get_weights_dir_path(mlpackage_path): """ Try to find the weights in mlpackage and return the path to the weights directory if found. @@ -182,7 +158,7 @@ def _try_get_weights_dir_path(mlpackage_path): class MLModel: """ - This class defines the minimal interface to a CoreML object in Python. + This class defines the minimal interface to a Core ML object in Python. At a high level, the protobuf specification consists of: @@ -379,10 +355,10 @@ def does_model_contain_mlprogram(model) -> bool: self.package_path = model self.is_temp_package = is_temp_package self._weights_dir = _try_get_weights_dir_path(model) - self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec( + self.__proxy__, self._spec, self._framework_error = self._get_proxy_and_spec( model, compute_units, skip_model_load=skip_model_load, ) - elif isinstance(model, _Model_pb2.Model): + elif isinstance(model, _proto.Model_pb2.Model): if does_model_contain_mlprogram(model): if model.WhichOneof("Type") == "mlProgram" and weights_dir is None: raise Exception( @@ -399,7 +375,7 @@ def does_model_contain_mlprogram(model) -> bool: filename = _tempfile.mktemp(suffix=_MLMODEL_EXTENSION) _save_spec(model, filename) - self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec( + self.__proxy__, self._spec, self._framework_error = self._get_proxy_and_spec( filename, compute_units, skip_model_load=skip_model_load, ) try: @@ -413,10 +389,43 @@ def does_model_contain_mlprogram(model) -> bool: self._input_description = _FeatureDescription(self._spec.description.input) self._output_description = _FeatureDescription(self._spec.description.output) + self._model_input_names_set = set([i.name for i in self._spec.description.input]) if self.is_package and self.is_temp_package: _atexit.register(cleanup, self.package_path) + + def _get_proxy_and_spec(self, + filename: str, + compute_units: _ComputeUnit, + skip_model_load: _Optional[bool] = False): + + filename = _os.path.expanduser(filename) + specification = _load_spec(filename) + + if _MLModelProxy and not skip_model_load: + + # check if the version is supported + engine_version = _MLModelProxy.maximum_supported_specification_version() + if specification.specificationVersion > engine_version: + # in this case the specification is a newer kind of .mlmodel than this + # version of the engine can support so we'll not try to have a proxy object + return None, specification, None + + try: + return _MLModelProxy(filename, compute_units.name), specification, None + except RuntimeError as e: + _warnings.warn( + "You will not be able to run predict() on this Core ML model." + + " Underlying exception message was: " + + str(e), + RuntimeWarning, + ) + return None, specification, e + + return None, specification, None + + @property def short_description(self): return self._spec.description.metadata.shortDescription @@ -509,6 +518,23 @@ def save(self, save_path: str): ) _shutil.copytree(self.package_path, save_path) + if self._mil_program is not None: + debug_handle_to_ops_mapping = ( + self._mil_program.construct_debug_handle_to_ops_mapping() + ) + if len(debug_handle_to_ops_mapping) > 0: + debug_handle_to_ops_mapping_as_json = json.dumps( + [ + {_METADATA_VERSION: self.user_defined_metadata[_METADATA_VERSION]}, + debug_handle_to_ops_mapping, + ] + ) + saved_debug_handle_to_ops_mapping_path = _os.path.join( + save_path, "executorch_debug_handle_mapping.json" + ) + with open(saved_debug_handle_to_ops_mapping_path, "w") as f: + f.write(debug_handle_to_ops_mapping_as_json) + saved_spec_path = _os.path.join( save_path, "Data", _MLPACKAGE_AUTHOR_NAME, _MODEL_FILE_NAME ) @@ -600,15 +626,6 @@ def verify_and_convert_input_dict(d): "Model prediction is only supported on macOS version 10.13 or later." ) - try: - from ..libcoremlpython import _MLModelProxy - except Exception as e: - print("Exception loading model proxy: %s\n" % e) - _MLModelProxy = None - except: - print("Exception while loading model proxy.\n") - _MLModelProxy = None - if not _MLModelProxy: raise Exception("Unable to load CoreML.framework. Cannot make predictions.") elif ( @@ -670,9 +687,9 @@ def _set_build_info_mil_attributes(self, metadata): build_info_proto = ml_program_attributes["buildInfo"] # Set ValueType to dictionary of string to string - str_type = _MIL_pb2.ValueType() - str_type.tensorType.dataType = _MIL_pb2.DataType.STRING - dict_type_str_to_str = _MIL_pb2.ValueType() + str_type = _proto.MIL_pb2.ValueType() + str_type.tensorType.dataType = _proto.MIL_pb2.DataType.STRING + dict_type_str_to_str = _proto.MIL_pb2.ValueType() dict_type_str_to_str.dictionaryType.keyType.CopyFrom(str_type) dict_type_str_to_str.dictionaryType.valueType.CopyFrom(str_type) build_info_proto.type.CopyFrom(dict_type_str_to_str) @@ -680,7 +697,7 @@ def _set_build_info_mil_attributes(self, metadata): # Copy the metadata build_info_dict = build_info_proto.immediateValue.dictionary for k, v in metadata.items(): - key_pair = _MIL_pb2.DictionaryValue.KeyValuePair() + key_pair = _proto.MIL_pb2.DictionaryValue.KeyValuePair() key_pair.key.immediateValue.tensor.strings.values.append(k) key_pair.key.type.CopyFrom(str_type) key_pair.value.immediateValue.tensor.strings.values.append(v) @@ -728,27 +745,36 @@ def _verify_pil_image_modes(self, input_dict): if not isinstance(input_val, _PIL_IMAGE.Image): msg = "Image input, '{}' must be of type PIL.Image.Image in the input dict" raise TypeError(msg.format(input_desc.name)) - if input_desc.type.imageType.colorSpace in (_ft.ImageFeatureType.BGR, _ft.ImageFeatureType.RGB): - if input_val.mode != 'RGB': + if input_desc.type.imageType.colorSpace in ( + _proto.FeatureTypes_pb2.ImageFeatureType.BGR, + _proto.FeatureTypes_pb2.ImageFeatureType.RGB, + ): + if input_val.mode != "RGB": msg = "RGB/BGR image input, '{}', must be of type PIL.Image.Image with mode=='RGB'" raise TypeError(msg.format(input_desc.name)) - elif input_desc.type.imageType.colorSpace == _ft.ImageFeatureType.GRAYSCALE: - if input_val.mode != 'L': + elif ( + input_desc.type.imageType.colorSpace + == _proto.FeatureTypes_pb2.ImageFeatureType.GRAYSCALE + ): + if input_val.mode != "L": msg = "GRAYSCALE image input, '{}', must be of type PIL.Image.Image with mode=='L'" raise TypeError(msg.format(input_desc.name)) - elif input_desc.type.imageType.colorSpace == _ft.ImageFeatureType.GRAYSCALE_FLOAT16: - if input_val.mode != 'F': + elif ( + input_desc.type.imageType.colorSpace + == _proto.FeatureTypes_pb2.ImageFeatureType.GRAYSCALE_FLOAT16 + ): + if input_val.mode != "F": msg = "GRAYSCALE_FLOAT16 image input, '{}', must be of type PIL.Image.Image with mode=='F'" raise TypeError(msg.format(input_desc.name)) + def _verify_input_name_exists(self, input_dict): - model_input_names = [inp.name for inp in self._spec.description.input] - model_input_names_set = set(model_input_names) for given_input in input_dict.keys(): - if given_input not in model_input_names_set: + if given_input not in self._model_input_names_set: err_msg = "Provided key \"{}\", in the input dict, " \ "does not match any of the model input name(s), which are: {}" - raise KeyError(err_msg.format(given_input, ",".join(model_input_names))) + raise KeyError(err_msg.format(given_input, self._model_input_names_set)) + @staticmethod def _update_float16_multiarray_input_to_float32(input_data: dict): diff --git a/coremltools/models/neural_network/builder.py b/coremltools/models/neural_network/builder.py index 7f791c2c4..6b28a0008 100644 --- a/coremltools/models/neural_network/builder.py +++ b/coremltools/models/neural_network/builder.py @@ -10,18 +10,16 @@ import numpy as _np -from ... import (_MINIMUM_NDARRAY_SPEC_VERSION, - _MINIMUM_UPDATABLE_SPEC_VERSION, - _SPECIFICATION_VERSION_IOS_14) +from ... import ( + _MINIMUM_NDARRAY_SPEC_VERSION, + _MINIMUM_UPDATABLE_SPEC_VERSION, + _SPECIFICATION_VERSION_IOS_14, +) from ... import SPECIFICATION_VERSION as _SPECIFICATION_VERSION -from ...proto import FeatureTypes_pb2 as _FeatureTypes_pb2 -from ...proto import Model_pb2 as _Model_pb2 -from ...proto import NeuralNetwork_pb2 as _NeuralNetwork_pb2 +from ... import proto as _proto from .. import datatypes -from .._interface_management import (set_training_features, - set_transform_interface_params) -from .quantization_utils import (_convert_array_to_nbit_quantized_bytes, - _unpack_to_bytes) +from .._interface_management import set_training_features, set_transform_interface_params +from .quantization_utils import _convert_array_to_nbit_quantized_bytes, _unpack_to_bytes from .spec_inspection_utils import _summarize_network_layer_info from .update_optimizer_utils import AdamParams, SgdParams @@ -146,7 +144,7 @@ def _get_nn_spec(spec): def _get_lstm_weight_fields(lstm_wp): """ Get LSTM weight fields. - lstm_wp: _NeuralNetwork_pb2.LSTMWeightParams + lstm_wp: _proto.NeuralNetwork_pb2.LSTMWeightParams """ return [ lstm_wp.inputGateWeightMatrix, @@ -387,7 +385,7 @@ def __init__( # Set the interface params. if self.spec is None: - self.spec = _Model_pb2.Model() + self.spec = _proto.Model_pb2.Model() self.spec.specificationVersion = _SPECIFICATION_VERSION if disable_rank5_shape_mapping: self.spec.specificationVersion = _MINIMUM_NDARRAY_SPEC_VERSION @@ -408,9 +406,9 @@ def __init__( del self.spec.description.output[:] if use_float_arraytype: - array_datatype = _Model_pb2.ArrayFeatureType.FLOAT32 + array_datatype = _proto.Model_pb2.ArrayFeatureType.FLOAT32 else: - array_datatype = _Model_pb2.ArrayFeatureType.DOUBLE + array_datatype = _proto.Model_pb2.ArrayFeatureType.DOUBLE self.spec = set_transform_interface_params( self.spec, @@ -439,11 +437,13 @@ def __init__( self.nn_spec = nn_spec if disable_rank5_shape_mapping and self.nn_spec: - self.nn_spec.arrayInputShapeMapping = _NeuralNetwork_pb2.NeuralNetworkMultiArrayShapeMapping.Value( - "EXACT_ARRAY_MAPPING" + self.nn_spec.arrayInputShapeMapping = ( + _proto.NeuralNetwork_pb2.NeuralNetworkMultiArrayShapeMapping.Value( + "EXACT_ARRAY_MAPPING" + ) ) - self.nn_spec.imageInputShapeMapping = _NeuralNetwork_pb2.NeuralNetworkImageShapeMapping.Value( - "RANK4_IMAGE_MAPPING" + self.nn_spec.imageInputShapeMapping = ( + _proto.NeuralNetwork_pb2.NeuralNetworkImageShapeMapping.Value("RANK4_IMAGE_MAPPING") ) def set_input(self, input_names, input_dims): @@ -503,7 +503,7 @@ def set_input(self, input_names, input_dims): # TODO: if it's an embedding, this should be integer spec.description.input[ idx - ].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE + ].type.multiArrayType.dataType = _proto.Model_pb2.ArrayFeatureType.DOUBLE spec.description.input[idx].name = input_names[idx] @@ -542,7 +542,7 @@ def set_output(self, output_names, output_dims): spec.description.output[idx].type.multiArrayType.shape.extend(dim) spec.description.output[ idx - ].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE + ].type.multiArrayType.dataType = _proto.Model_pb2.ArrayFeatureType.DOUBLE spec.description.output[idx].name = output_names[idx] @@ -735,11 +735,11 @@ def add_optionals(self, optionals_in, optionals_out): for idx in range(len_before_in, len(spec.description.input)): spec.description.input[ idx - ].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE + ].type.multiArrayType.dataType = _proto.Model_pb2.ArrayFeatureType.DOUBLE for idx in range(len_before_out, len(spec.description.output)): spec.description.output[ idx - ].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE + ].type.multiArrayType.dataType = _proto.Model_pb2.ArrayFeatureType.DOUBLE def _check_fp16_weight_params_lstms(self, lstm_wp, has_peephole=True): @@ -939,11 +939,11 @@ def make_updatable(self, trainables): typed_layer = getattr(spec_layer, spec_layer.WhichOneof("layer")) for fd in typed_layer.DESCRIPTOR.fields: field = getattr(typed_layer, fd.name) - if type(field) == _NeuralNetwork_pb2.LSTMWeightParams: + if type(field) == _proto.NeuralNetwork_pb2.LSTMWeightParams: wfs = _get_lstm_weight_fields(field) for wf in wfs: wf.isUpdatable = True - elif type(field) == _NeuralNetwork_pb2.WeightParams: + elif type(field) == _proto.NeuralNetwork_pb2.WeightParams: field.isUpdatable = True else: pass @@ -1042,9 +1042,7 @@ def set_categorical_cross_entropy_loss(self, name, input): else: training_input.name = target datatypes._set_datatype(training_input.type, datatypes.Array(1)) - training_input.type.multiArrayType.dataType = ( - _Model_pb2.ArrayFeatureType.INT32 - ) + training_input.type.multiArrayType.dataType = _proto.Model_pb2.ArrayFeatureType.INT32 print( "Now adding input {} as target for categorical cross-entropy loss layer.".format( @@ -1112,7 +1110,7 @@ def set_mean_squared_error_loss(self, name, input_feature=None): training_input.name = target datatypes._set_datatype(training_input.type, input_feature[1]) - training_input.type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE + training_input.type.multiArrayType.dataType = _proto.Model_pb2.ArrayFeatureType.DOUBLE print( "Now adding input {} as target for mean squared error loss layer.".format( target @@ -2097,11 +2095,13 @@ def add_upsample( spec_layer_params.scalingFactor.append(int(scaling_factor_h)) spec_layer_params.scalingFactor.append(int(scaling_factor_w)) - spec_layer_params.mode = _NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value( - mode + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value(mode) ) - spec_layer_params.linearUpsampleMode = _NeuralNetwork_pb2.UpsampleLayerParams.LinearUpsampleMode.Value( - linear_upsample_mode + spec_layer_params.linearUpsampleMode = ( + _proto.NeuralNetwork_pb2.UpsampleLayerParams.LinearUpsampleMode.Value( + linear_upsample_mode + ) ) return spec_layer @@ -2469,8 +2469,10 @@ def add_convolution( "Invalid value %d of same_padding_asymmetry_mode parameter" % same_padding_asymmetry_mode ) - spec_layer_params.same.asymmetryMode = _NeuralNetwork_pb2.SamePadding.SamePaddingMode.Value( - same_padding_asymmetry_mode + spec_layer_params.same.asymmetryMode = ( + _proto.NeuralNetwork_pb2.SamePadding.SamePaddingMode.Value( + same_padding_asymmetry_mode + ) ) else: raise NotImplementedError( @@ -2722,8 +2724,10 @@ def add_convolution3d( spec_layer_params.customPaddingBottom = padding_bottom spec_layer_params.customPaddingLeft = padding_left spec_layer_params.customPaddingRight = padding_right - spec_layer_params.paddingType = _NeuralNetwork_pb2.Convolution3DLayerParams.PaddingType.Value( - padding_mode.upper() + spec_layer_params.paddingType = ( + _proto.NeuralNetwork_pb2.Convolution3DLayerParams.PaddingType.Value( + padding_mode.upper() + ) ) spec_layer_params.dilationDepth = dilation_depth @@ -2837,7 +2841,7 @@ def add_pooling( spec_layer_params = spec_layer.pooling # Set the parameters - spec_layer_params.type = _NeuralNetwork_pb2.PoolingLayerParams.PoolingType.Value( + spec_layer_params.type = _proto.NeuralNetwork_pb2.PoolingLayerParams.PoolingType.Value( layer_type.upper() ) @@ -2868,8 +2872,10 @@ def add_pooling( "Invalid value %d of same_padding_asymmetry_mode parameter" % same_padding_asymmetry_mode ) - spec_layer_params.same.asymmetryMode = _NeuralNetwork_pb2.SamePadding.SamePaddingMode.Value( - same_padding_asymmetry_mode + spec_layer_params.same.asymmetryMode = ( + _proto.NeuralNetwork_pb2.SamePadding.SamePaddingMode.Value( + same_padding_asymmetry_mode + ) ) elif padding_type == "INCLUDE_LAST_PIXEL": if padding_top != padding_bottom or padding_left != padding_right: @@ -2968,7 +2974,7 @@ def add_pooling3d( spec_layer = self._add_generic_layer(name, [input_name], [output_name]) spec_layer_params = spec_layer.pooling3d - spec_layer_params.type = _NeuralNetwork_pb2.Pooling3DLayerParams.PoolingType3D.Value( + spec_layer_params.type = _proto.NeuralNetwork_pb2.Pooling3DLayerParams.PoolingType3D.Value( pooling_type.upper() ) @@ -2993,8 +2999,10 @@ def add_pooling3d( spec_layer_params.customPaddingBottom = custom_padding_bottom spec_layer_params.customPaddingLeft = custom_padding_left spec_layer_params.customPaddingRight = custom_padding_right - spec_layer_params.paddingType = _NeuralNetwork_pb2.Pooling3DLayerParams.Pooling3DPaddingType.Value( - padding_mode.upper() + spec_layer_params.paddingType = ( + _proto.NeuralNetwork_pb2.Pooling3DLayerParams.Pooling3DPaddingType.Value( + padding_mode.upper() + ) ) spec_layer_params.countExcludePadding = average_pooling_count_excludes_padding @@ -3034,8 +3042,10 @@ def add_global_pooling3d(self, name, input_name, output_name, pooling_type): spec_layer = self._add_generic_layer(name, [input_name], [output_name]) spec_layer_params = spec_layer.globalPooling3d - spec_layer_params.type = _NeuralNetwork_pb2.GlobalPooling3DLayerParams.GlobalPoolingType3D.Value( - pooling_type.upper() + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.GlobalPooling3DLayerParams.GlobalPoolingType3D.Value( + pooling_type.upper() + ) ) return spec_layer @@ -3743,11 +3753,11 @@ def add_flatten(self, name, mode, input_name, output_name): # Set the parameters if mode == 0: - spec_layer_params.mode = _NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Value( + spec_layer_params.mode = _proto.NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Value( "CHANNEL_FIRST" ) elif mode == 1: - spec_layer_params.mode = _NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Value( + spec_layer_params.mode = _proto.NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Value( "CHANNEL_LAST" ) else: @@ -3803,15 +3813,15 @@ def add_slice( axis = axis.lower() if isinstance(axis, str) else axis if axis == "channel": - spec_layer_params.axis = _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value( + spec_layer_params.axis = _proto.NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value( "CHANNEL_AXIS" ) elif axis == "height": - spec_layer_params.axis = _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value( + spec_layer_params.axis = _proto.NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value( "HEIGHT_AXIS" ) elif axis == "width": - spec_layer_params.axis = _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value( + spec_layer_params.axis = _proto.NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value( "WIDTH_AXIS" ) else: @@ -3909,12 +3919,16 @@ def add_reorganize_data( mode = mode.upper() if isinstance(mode, str) else mode if mode == "SPACE_TO_DEPTH": - spec_layer_params.mode = _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value( - "SPACE_TO_DEPTH" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value( + "SPACE_TO_DEPTH" + ) ) elif mode == "DEPTH_TO_SPACE": - spec_layer_params.mode = _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value( - "DEPTH_TO_SPACE" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value( + "DEPTH_TO_SPACE" + ) ) elif mode == "PIXEL_SHUFFLE": if self.spec and ( @@ -3922,8 +3936,10 @@ def add_reorganize_data( or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14 ): self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14 - spec_layer_params.mode = _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value( - "PIXEL_SHUFFLE" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value( + "PIXEL_SHUFFLE" + ) ) else: raise NotImplementedError("Unknown reorganization mode %s." % mode) @@ -4085,11 +4101,11 @@ def add_reshape(self, name, input_name, output_name, target_shape, mode): spec_layer_params = spec_layer.reshape spec_layer_params.targetShape.extend(target_shape) if mode == 0: - spec_layer_params.mode = _NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Value( + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Value( "CHANNEL_FIRST" ) else: - spec_layer_params.mode = _NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Value( + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Value( "CHANNEL_LAST" ) @@ -4139,67 +4155,67 @@ def add_reduce(self, name, input_name, output_name, axis, mode, epsilon=1e-6): mode = mode.lower() if isinstance(mode, str) else mode if mode == "sum": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "SUM" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("SUM") ) elif mode == "avg": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "AVG" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("AVG") ) elif mode == "prod": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "PROD" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("PROD") ) elif mode == "logsum": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "LOGSUM" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("LOGSUM") ) elif mode == "sumsquare": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "SUMSQUARE" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("SUMSQUARE") ) elif mode == "l1": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "L1" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("L1") ) elif mode == "l2": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "L2" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("L2") ) elif mode == "max": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "MAX" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("MAX") ) elif mode == "min": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "MIN" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("MIN") ) elif mode == "argmax": - spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value( - "ARGMAX" + spec_layer_params.mode = ( + _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value("ARGMAX") ) else: raise NotImplementedError("Unknown reduction operation %s." % mode) axis = axis.upper() if isinstance(axis, str) else axis if axis == "CHW": - spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( + spec_layer_params.axis = _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( "CHW" ) elif axis == "HW": - spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( + spec_layer_params.axis = _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( "HW" ) elif axis == "C": - spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( + spec_layer_params.axis = _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( "C" ) elif axis == "H": - spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( + spec_layer_params.axis = _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( "H" ) elif axis == "W": - spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( + spec_layer_params.axis = _proto.NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value( "W" ) else: @@ -4377,36 +4393,36 @@ def add_unary( mode = mode.lower() if isinstance(mode, str) else mode if mode == "sqrt": - spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value( - "SQRT" + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value("SQRT") ) elif mode == "rsqrt": - spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value( - "RSQRT" + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value("RSQRT") ) elif mode == "inverse": - spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value( - "INVERSE" + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value("INVERSE") ) elif mode == "power": - spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value( - "POWER" + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value("POWER") ) elif mode == "exp": - spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value( - "EXP" + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value("EXP") ) elif mode == "log": - spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value( - "LOG" + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value("LOG") ) elif mode == "abs": - spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value( - "ABS" + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value("ABS") ) elif mode == "threshold": - spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value( - "THRESHOLD" + spec_layer_params.type = ( + _proto.NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value("THRESHOLD") ) else: raise NotImplementedError("Unknown unary function %s " % mode) @@ -4552,20 +4568,20 @@ def add_resize_bilinear( mode = mode.upper() if isinstance(mode, str) else mode if mode == "ALIGN_ENDPOINTS_MODE": - spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value( - "ALIGN_ENDPOINTS_MODE" + spec_layer_params.mode.samplingMethod = ( + _proto.NeuralNetwork_pb2.SamplingMode.Method.Value("ALIGN_ENDPOINTS_MODE") ) elif mode == "STRICT_ALIGN_ENDPOINTS_MODE": - spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value( - "STRICT_ALIGN_ENDPOINTS_MODE" + spec_layer_params.mode.samplingMethod = ( + _proto.NeuralNetwork_pb2.SamplingMode.Method.Value("STRICT_ALIGN_ENDPOINTS_MODE") ) elif mode == "UPSAMPLE_MODE": - spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value( - "UPSAMPLE_MODE" + spec_layer_params.mode.samplingMethod = ( + _proto.NeuralNetwork_pb2.SamplingMode.Method.Value("UPSAMPLE_MODE") ) elif mode == "ROI_ALIGN_MODE": - spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value( - "ROI_ALIGN_MODE" + spec_layer_params.mode.samplingMethod = ( + _proto.NeuralNetwork_pb2.SamplingMode.Method.Value("ROI_ALIGN_MODE") ) else: raise ValueError("Unsupported resize bilinear mode %s" % mode) @@ -4669,39 +4685,45 @@ def add_crop_resize( ) if mode == "ALIGN_ENDPOINTS_MODE": - spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value( - "ALIGN_ENDPOINTS_MODE" + spec_layer_params.mode.samplingMethod = ( + _proto.NeuralNetwork_pb2.SamplingMode.Method.Value("ALIGN_ENDPOINTS_MODE") ) elif mode == "STRICT_ALIGN_ENDPOINTS_MODE": - spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value( - "STRICT_ALIGN_ENDPOINTS_MODE" + spec_layer_params.mode.samplingMethod = ( + _proto.NeuralNetwork_pb2.SamplingMode.Method.Value("STRICT_ALIGN_ENDPOINTS_MODE") ) elif mode == "UPSAMPLE_MODE": - spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value( - "UPSAMPLE_MODE" + spec_layer_params.mode.samplingMethod = ( + _proto.NeuralNetwork_pb2.SamplingMode.Method.Value("UPSAMPLE_MODE") ) elif mode == "ROI_ALIGN_MODE": - spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value( - "ROI_ALIGN_MODE" + spec_layer_params.mode.samplingMethod = ( + _proto.NeuralNetwork_pb2.SamplingMode.Method.Value("ROI_ALIGN_MODE") ) else: raise ValueError("Unsupported crop resize mode %s" % mode) if box_indices_mode == "CORNERS_HEIGHT_FIRST": - spec_layer_params.boxIndicesMode.boxMode = _NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value( - "CORNERS_HEIGHT_FIRST" + spec_layer_params.boxIndicesMode.boxMode = ( + _proto.NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value( + "CORNERS_HEIGHT_FIRST" + ) ) elif box_indices_mode == "CORNERS_WIDTH_FIRST": - spec_layer_params.boxIndicesMode.boxMode = _NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value( - "CORNERS_WIDTH_FIRST" + spec_layer_params.boxIndicesMode.boxMode = ( + _proto.NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value("CORNERS_WIDTH_FIRST") ) elif box_indices_mode == "CENTER_SIZE_HEIGHT_FIRST": - spec_layer_params.boxIndicesMode.boxMode = _NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value( - "CENTER_SIZE_HEIGHT_FIRST" + spec_layer_params.boxIndicesMode.boxMode = ( + _proto.NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value( + "CENTER_SIZE_HEIGHT_FIRST" + ) ) elif box_indices_mode == "CENTER_SIZE_WIDTH_FIRST": - spec_layer_params.boxIndicesMode.boxMode = _NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value( - "CENTER_SIZE_WIDTH_FIRST" + spec_layer_params.boxIndicesMode.boxMode = ( + _proto.NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value( + "CENTER_SIZE_WIDTH_FIRST" + ) ) else: raise ValueError( @@ -4892,22 +4914,22 @@ def check_valid_preprocessing_keys(input, target, input_name): # TODO: If input is not rank 3 or 4, then accordingly handle # e.g. for rank-2 input, squeeze additional dimension in case of Gray scale image if channels == 1: - input_.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value( - "GRAYSCALE" + input_.type.imageType.colorSpace = ( + _proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("GRAYSCALE") ) elif channels == 3: if input_.name in is_bgr: if is_bgr[input_.name]: - input_.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value( - "BGR" + input_.type.imageType.colorSpace = ( + _proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("BGR") ) else: - input_.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value( - "RGB" + input_.type.imageType.colorSpace = ( + _proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("RGB") ) else: - input_.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value( - "RGB" + input_.type.imageType.colorSpace = ( + _proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("RGB") ) else: raise ValueError( @@ -5058,15 +5080,15 @@ def add_gelu(self, name, input_name, output_name, mode="EXACT"): spec_layer_params = spec_layer.gelu if mode == "EXACT": - spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value( + spec_layer_params.mode = _proto.NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value( "EXACT" ) elif mode == "TANH_APPROXIMATION": - spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value( + spec_layer_params.mode = _proto.NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value( "TANH_APPROXIMATION" ) elif mode == "SIGMOID_APPROXIMATION": - spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value( + spec_layer_params.mode = _proto.NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value( "SIGMOID_APPROXIMATION" ) else: @@ -6530,21 +6552,19 @@ def add_scatter(self, name, input_names, output_name, axis=0, mode="UPDATE"): mode = mode.upper() if isinstance(mode, str) else mode if mode == "UPDATE": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value( - "SCATTER_UPDATE" - ) + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_UPDATE") elif mode == "ADD": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD") elif mode == "SUB": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB") elif mode == "MUL": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL") elif mode == "DIV": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV") elif mode == "MAX": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX") elif mode == "MIN": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN") else: raise ValueError("Unsupported Scatter mode %s" % mode) @@ -6611,21 +6631,19 @@ def add_scatter_along_axis( mode = mode.upper() if isinstance(mode, str) else mode if mode == "UPDATE": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value( - "SCATTER_UPDATE" - ) + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_UPDATE") elif mode == "ADD": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD") elif mode == "SUB": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB") elif mode == "MUL": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL") elif mode == "DIV": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV") elif mode == "MAX": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX") elif mode == "MIN": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN") else: raise ValueError("Unsupported scatter_along_axis mode %s" % mode) @@ -6690,21 +6708,19 @@ def add_scatter_nd(self, name, input_names, output_name, mode="UPDATE"): mode = mode.upper() if isinstance(mode, str) else mode if mode == "UPDATE": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value( - "SCATTER_UPDATE" - ) + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_UPDATE") elif mode == "ADD": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD") elif mode == "SUB": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB") elif mode == "MUL": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL") elif mode == "DIV": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV") elif mode == "MAX": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX") elif mode == "MIN": - spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN") + spec_layer_params.mode = _proto.NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN") else: raise ValueError("Unsupported scatter mode %s" % mode) diff --git a/coremltools/models/neural_network/flexible_shape_utils.py b/coremltools/models/neural_network/flexible_shape_utils.py index f2e42794e..fbb8d1f03 100644 --- a/coremltools/models/neural_network/flexible_shape_utils.py +++ b/coremltools/models/neural_network/flexible_shape_utils.py @@ -7,8 +7,12 @@ Utilities to annotate Neural Network Features with flexible shape information. """ -from ... import (_MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION, - _MINIMUM_NDARRAY_SPEC_VERSION) +from typing import List as _List +from typing import Tuple as _Tuple + +from coremltools.proto import Model_pb2 as _ml + +from ... import _MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION, _MINIMUM_NDARRAY_SPEC_VERSION from ..utils import _get_feature _SEQUENCE_KEY = "S" @@ -296,6 +300,150 @@ def get_height_range(self): return self._height_range +def _set_multiarray_ndshape_range_for_feature( + feature: _ml.FeatureDescription, + lower_bounds: _List[int], + upper_bounds: _List[int], +): + + if not isinstance(lower_bounds, list): + raise Exception("lower_bounds must be a list") + if not isinstance(upper_bounds, list): + raise Exception("upper_bounds must be a list") + + if feature.type.WhichOneof("Type") != "multiArrayType": + raise Exception("Trying to update shape range for " "a non-multiArray feature type") + + shape = feature.type.multiArrayType.shape + + if len(shape) != len(lower_bounds): + raise Exception( + "Length of lower_bounds is not equal to the number of dimensions in the default shape" + ) + if len(shape) != len(upper_bounds): + raise Exception( + "Length of upper_bounds is not equal to the number of dimensions in the default shape" + ) + + feature.type.multiArrayType.ClearField("ShapeFlexibility") + + for i in range(len(lower_bounds)): + if shape[i] < lower_bounds[i]: + raise Exception( + "Default shape in %d-th dimension, which is %d, is smaller" + " than the lower bound of %d" % (i, int(shape[i]), lower_bounds[i]) + ) + if upper_bounds[i] != -1: + if shape[i] > upper_bounds[i]: + raise Exception( + "Default shape in %d-th dimension, which is %d, is greater" + " than the upper bound of %d" % (i, int(shape[i]), upper_bounds[i]) + ) + + s = feature.type.multiArrayType.shapeRange.sizeRanges.add() + s.lowerBound = lower_bounds[i] + s.upperBound = upper_bounds[i] + + +def _update_image_size_range_for_feature( + feature: _ml.FeatureDescription, + size_range: NeuralNetworkImageSizeRange, +): + if not isinstance(size_range, NeuralNetworkImageSizeRange): + raise Exception("Shape ranges should be of type NeuralNetworkImageSizeRange") + + if feature.type.WhichOneof("Type") != "imageType": + raise Exception("Trying to add size ranges for " "a non-image feature type") + + feature.type.imageType.ClearField("SizeFlexibility") + feature.type.imageType.imageSizeRange.heightRange.lowerBound = ( + size_range.get_height_range().lowerBound + ) + feature.type.imageType.imageSizeRange.heightRange.upperBound = ( + size_range.get_height_range().upperBound + ) + + feature.type.imageType.imageSizeRange.widthRange.lowerBound = ( + size_range.get_width_range().lowerBound + ) + feature.type.imageType.imageSizeRange.widthRange.upperBound = ( + size_range.get_width_range().upperBound + ) + + +def _add_multiarray_ndshape_enumeration_for_feature( + feature: _ml.FeatureDescription, + enumerated_shapes: _List[_Tuple[int]], +): + if not isinstance(enumerated_shapes, list): + raise Exception("enumerated_shapes must be a list") + if len(enumerated_shapes) == 0: + raise Exception("enumerated_shapes is empty") + + if feature.type.WhichOneof("Type") != "multiArrayType": + raise Exception("Trying to update shape range for " "a non-multiArray feature type") + + shape = feature.type.multiArrayType.shape + + if feature.type.multiArrayType.WhichOneof("ShapeFlexibility") != "enumeratedShapes": + feature.type.multiArrayType.ClearField("ShapeFlexibility") + + eshape_len = len(feature.type.multiArrayType.enumeratedShapes.shapes) + + shapes_added_so_far = [] + + # Add default array shape to list of enumerated shapes if enumerated shapes + # field is currently empty + if eshape_len == 0: + fixed_shape = feature.type.multiArrayType.shape + s = feature.type.multiArrayType.enumeratedShapes.shapes.add() + s.shape.extend(fixed_shape) + shapes_added_so_far.append(list(fixed_shape)) + + for shape in enumerated_shapes: + if not isinstance(shape, tuple): + raise Exception("An element in 'enumerated_shapes' is not a tuple") + if list(shape) not in shapes_added_so_far: + s = feature.type.multiArrayType.enumeratedShapes.shapes.add() + s.shape.extend(list(shape)) + shapes_added_so_far.append(list(shape)) + + +def _add_enumerated_image_sizes_for_feature( + feature: _ml.FeatureDescription, + sizes: _List[NeuralNetworkImageSize], +): + if not isinstance(sizes, list): + sizes = [sizes] + + for size in sizes: + if not isinstance(size, NeuralNetworkImageSize): + raise Exception("Shape ranges should be of type NeuralNetworkImageSize") + + if feature.type.WhichOneof("Type") != "imageType": + raise Exception("Trying to add enumerated sizes to " "a non-image feature type") + + if feature.type.imageType.WhichOneof("SizeFlexibility") != "enumeratedSizes": + feature.type.imageType.ClearField("SizeFlexibility") + + esizes_len = len(feature.type.imageType.enumeratedSizes.sizes) + + # Add default image size to list of enumerated sizes if enumerated sizes + # field is currently empty + if esizes_len == 0: + fixed_height = feature.type.imageType.height + fixed_width = feature.type.imageType.width + sizes.append(NeuralNetworkImageSize(fixed_height, fixed_width)) + + shapes_added_so_far = [] + for size in sizes: + if [size.height, size.width] not in shapes_added_so_far: + s = feature.type.imageType.enumeratedSizes.sizes.add() + s.height = size.height + s.width = size.width + shapes_added_so_far.append([s.height, s.width]) + + def add_enumerated_multiarray_shapes(spec, feature_name, shapes): """ Annotate an input or output multiArray feature in a Neural Network spec to @@ -317,16 +465,19 @@ def add_enumerated_multiarray_shapes(spec, feature_name, shapes): -------- .. sourcecode:: python - >>> import coremltools - >>> from coremltools.models.neural_network import flexible_shape_utils - >>> spec = coremltools.utils.load_spec('mymodel.mlmodel') - >>> array_shapes = [flexible_shape_utils.NeuralNetworkMultiArrayShape(3)] - >>> second_shape = flexible_shape_utils.NeuralNetworkMultiArrayShape() - >>> second_shape.set_channel_shape(3) - >>> second_shape.set_height_shape(10) - >>> second_shape.set_width_shape(15) - >>> array_shapes.append(second_shape) - >>> flexible_shape_utils.add_enumerated_multiarray_shapes(spec, feature_name='my_multiarray_featurename', shapes=array_shapes) + import coremltools + from coremltools.models.neural_network import flexible_shape_utils + + spec = coremltools.utils.load_spec("mymodel.mlmodel") + array_shapes = [flexible_shape_utils.NeuralNetworkMultiArrayShape(3)] + second_shape = flexible_shape_utils.NeuralNetworkMultiArrayShape() + second_shape.set_channel_shape(3) + second_shape.set_height_shape(10) + second_shape.set_width_shape(15) + array_shapes.append(second_shape) + flexible_shape_utils.add_enumerated_multiarray_shapes( + spec, feature_name="my_multiarray_featurename", shapes=array_shapes + ) :return: None. The spec object is updated @@ -380,7 +531,6 @@ def add_enumerated_multiarray_shapes(spec, feature_name, shapes): _MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION, spec.specificationVersion ) - def add_enumerated_image_sizes(spec, feature_name, sizes): """ Annotate an input or output image feature in a Neural Network spec to @@ -402,12 +552,15 @@ def add_enumerated_image_sizes(spec, feature_name, sizes): -------- .. sourcecode:: python - >>> import coremltools - >>> from coremltools.models.neural_network import flexible_shape_utils - >>> spec = coremltools.utils.load_spec('mymodel.mlmodel') - >>> image_sizes = [flexible_shape_utils.NeuralNetworkImageSize(128, 128)] - >>> image_sizes.append(flexible_shape_utils.NeuralNetworkImageSize(256, 256)) - >>> flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='my_multiarray_featurename', sizes=image_sizes) + import coremltools + from coremltools.models.neural_network import flexible_shape_utils + + spec = coremltools.utils.load_spec("mymodel.mlmodel") + image_sizes = [flexible_shape_utils.NeuralNetworkImageSize(128, 128)] + image_sizes.append(flexible_shape_utils.NeuralNetworkImageSize(256, 256)) + flexible_shape_utils.add_enumerated_image_sizes( + spec, feature_name="my_multiarray_featurename", sizes=image_sizes + ) :return: None. The spec object is updated @@ -448,7 +601,6 @@ def add_enumerated_image_sizes(spec, feature_name, sizes): _MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION, spec.specificationVersion ) - def update_image_size_range(spec, feature_name, size_range): """ Annotate an input or output Image feature in a Neural Network spec to @@ -470,38 +622,22 @@ def update_image_size_range(spec, feature_name, size_range): -------- .. sourcecode:: python - >>> import coremltools - >>> from coremltools.models.neural_network import flexible_shape_utils - >>> spec = coremltools.utils.load_spec('mymodel.mlmodel') - >>> img_size_ranges = flexible_shape_utils.NeuralNetworkImageSizeRange() - >>> img_size_ranges.add_height_range(64, 128) - >>> img_size_ranges.add_width_range(128, -1) - >>> flexible_shape_utils.update_image_size_range(spec, feature_name='my_multiarray_featurename', size_range=img_size_ranges) + import coremltools + from coremltools.models.neural_network import flexible_shape_utils + + spec = coremltools.utils.load_spec("mymodel.mlmodel") + img_size_ranges = flexible_shape_utils.NeuralNetworkImageSizeRange() + img_size_ranges.add_height_range(64, 128) + img_size_ranges.add_width_range(128, -1) + flexible_shape_utils.update_image_size_range( + spec, feature_name="my_multiarray_featurename", size_range=img_size_ranges + ) :return: None. The spec object is updated """ - if not isinstance(size_range, NeuralNetworkImageSizeRange): - raise Exception("Shape ranges should be of type NeuralNetworkImageSizeRange") - feature = _get_feature(spec, feature_name) - if feature.type.WhichOneof("Type") != "imageType": - raise Exception("Trying to add size ranges for " "a non-image feature type") - - feature.type.imageType.ClearField("SizeFlexibility") - feature.type.imageType.imageSizeRange.heightRange.lowerBound = ( - size_range.get_height_range().lowerBound - ) - feature.type.imageType.imageSizeRange.heightRange.upperBound = ( - size_range.get_height_range().upperBound - ) - - feature.type.imageType.imageSizeRange.widthRange.lowerBound = ( - size_range.get_width_range().lowerBound - ) - feature.type.imageType.imageSizeRange.widthRange.upperBound = ( - size_range.get_width_range().upperBound - ) + _update_image_size_range_for_feature(feature, size_range) # Bump up specification version spec.specificationVersion = max( @@ -532,14 +668,17 @@ def update_multiarray_shape_range(spec, feature_name, shape_range): -------- .. sourcecode:: python - >>> import coremltools - >>> from coremltools.models.neural_network import flexible_shape_utils - >>> spec = coremltools.utils.load_spec('mymodel.mlmodel') - >>> shape_range = flexible_shape_utils.NeuralNetworkMultiArrayShapeRange() - >>> shape_range.add_channel_range((1, 3)) - >>> shape_range.add_width_range((128, 256)) - >>> shape_range.add_height_range((128, 256)) - >>> flexible_shape_utils.update_multiarray_shape_range(spec, feature_name='my_multiarray_featurename', shape_range=shape_range) + import coremltools + from coremltools.models.neural_network import flexible_shape_utils + + spec = coremltools.utils.load_spec("mymodel.mlmodel") + shape_range = flexible_shape_utils.NeuralNetworkMultiArrayShapeRange() + shape_range.add_channel_range((1, 3)) + shape_range.add_width_range((128, 256)) + shape_range.add_height_range((128, 256)) + flexible_shape_utils.update_multiarray_shape_range( + spec, feature_name="my_multiarray_featurename", shape_range=shape_range + ) :return: None. The spec is updated @@ -606,63 +745,29 @@ def set_multiarray_ndshape_range(spec, feature_name, lower_bounds, upper_bounds) -------- .. sourcecode:: python - >>> import coremltools - >>> from coremltools.models.neural_network import flexible_shape_utils - >>> spec = coremltools.utils.load_spec('mymodel.mlmodel') - >>> # say, the default shape of "my_multiarray_featurename" is (2,3) - >>> flexible_shape_utils.set_multiarray_ndshape_range(spec, feature_name='my_multiarray_featurename', lower_bounds=[1,2], upper_bounds=[10,-1]) + import coremltools + from coremltools.models.neural_network import flexible_shape_utils + + spec = coremltools.utils.load_spec("mymodel.mlmodel") + # say, the default shape of "my_multiarray_featurename" is (2,3) + flexible_shape_utils.set_multiarray_ndshape_range( + spec, + feature_name="my_multiarray_featurename", + lower_bounds=[1, 2], + upper_bounds=[10, -1], + ) :return: None. The spec is updated """ - if not isinstance(lower_bounds, list): - raise Exception("lower_bounds must be a list") - if not isinstance(upper_bounds, list): - raise Exception("upper_bounds must be a list") - feature = _get_feature(spec, feature_name) - - if feature.type.WhichOneof("Type") != "multiArrayType": - raise Exception( - "Trying to update shape range for " "a non-multiArray feature type" - ) - - shape = feature.type.multiArrayType.shape - - if len(shape) != len(lower_bounds): - raise Exception( - "Length of lower_bounds is not equal to the number of dimensions in the default shape" - ) - if len(shape) != len(upper_bounds): - raise Exception( - "Length of upper_bounds is not equal to the number of dimensions in the default shape" - ) - - feature.type.multiArrayType.ClearField("ShapeFlexibility") - - for i in range(len(lower_bounds)): - if shape[i] < lower_bounds[i]: - raise Exception( - "Default shape in %d-th dimension, which is %d, is smaller" - " than the lower bound of %d" % (i, int(shape[i]), lower_bounds[i]) - ) - if upper_bounds[i] != -1: - if shape[i] > upper_bounds[i]: - raise Exception( - "Default shape in %d-th dimension, which is %d, is greater" - " than the upper bound of %d" % (i, int(shape[i]), upper_bounds[i]) - ) - - s = feature.type.multiArrayType.shapeRange.sizeRanges.add() - s.lowerBound = lower_bounds[i] - s.upperBound = upper_bounds[i] + _set_multiarray_ndshape_range_for_feature(feature, lower_bounds, upper_bounds) # Bump up specification version spec.specificationVersion = max( _MINIMUM_NDARRAY_SPEC_VERSION, spec.specificationVersion ) - def add_multiarray_ndshape_enumeration(spec, feature_name, enumerated_shapes): """ Annotate an input or output MLMultiArray feature in a Neural Network spec @@ -687,50 +792,20 @@ def add_multiarray_ndshape_enumeration(spec, feature_name, enumerated_shapes): -------- .. sourcecode:: python - >>> import coremltools - >>> from coremltools.models.neural_network import flexible_shape_utils - >>> spec = coremltools.utils.load_spec('mymodel.mlmodel') - >>> # say, the default shape of "my_multiarray_featurename" is (2,3) - >>> flexible_shape_utils.add_multiarray_ndshape_enumeration(spec, feature_name='my_multiarray_featurename', enumerated_shapes=[(2,4), (2,6)]) + import coremltools + from coremltools.models.neural_network import flexible_shape_utils + + spec = coremltools.utils.load_spec("mymodel.mlmodel") + # say, the default shape of "my_multiarray_featurename" is (2,3) + flexible_shape_utils.add_multiarray_ndshape_enumeration( + spec, feature_name="my_multiarray_featurename", enumerated_shapes=[(2, 4), (2, 6)] + ) :return: None. The spec is updated """ - if not isinstance(enumerated_shapes, list): - raise Exception("enumerated_shapes must be a list") - if len(enumerated_shapes) == 0: - raise Exception("enumerated_shapes is empty") - feature = _get_feature(spec, feature_name) - if feature.type.WhichOneof("Type") != "multiArrayType": - raise Exception( - "Trying to update shape range for " "a non-multiArray feature type" - ) - - shape = feature.type.multiArrayType.shape - - if feature.type.multiArrayType.WhichOneof("ShapeFlexibility") != "enumeratedShapes": - feature.type.multiArrayType.ClearField("ShapeFlexibility") - - eshape_len = len(feature.type.multiArrayType.enumeratedShapes.shapes) - - shapes_added_so_far = [] - - # Add default array shape to list of enumerated shapes if enumerated shapes - # field is currently empty - if eshape_len == 0: - fixed_shape = feature.type.multiArrayType.shape - s = feature.type.multiArrayType.enumeratedShapes.shapes.add() - s.shape.extend(fixed_shape) - shapes_added_so_far.append(list(fixed_shape)) - - for shape in enumerated_shapes: - if not isinstance(shape, tuple): - raise Exception("An element in 'enumerated_shapes' is not a tuple") - if list(shape) not in shapes_added_so_far: - s = feature.type.multiArrayType.enumeratedShapes.shapes.add() - s.shape.extend(list(shape)) - shapes_added_so_far.append(list(shape)) + _add_multiarray_ndshape_enumeration_for_feature(feature, enumerated_shapes) # Bump up specification version spec.specificationVersion = max( diff --git a/coremltools/models/utils.py b/coremltools/models/utils.py index 36e4e269b..851598c16 100644 --- a/coremltools/models/utils.py +++ b/coremltools/models/utils.py @@ -7,24 +7,24 @@ Utilities for the entire package. """ -from collections.abc import Iterable as _Iterable -from functools import lru_cache as _lru_cache import math as _math import os as _os import shutil as _shutil import subprocess as _subprocess import sys as _sys import tempfile as _tempfile -from typing import Optional as _Optional, Union as _Union import warnings as _warnings +from collections.abc import Iterable as _Iterable +from functools import lru_cache as _lru_cache +from typing import Optional as _Optional +from typing import Union as _Union import numpy as _np import coremltools as _ct from coremltools import ComputeUnit as _ComputeUnit +from coremltools import proto as _proto from coremltools.converters.mil.mil.passes.defs.preprocess import NameSanitizer as _NameSanitizer -from coremltools.proto import Model_pb2 as _Model_pb2 -import coremltools.proto.MIL_pb2 as _mil_proto from .._deps import _HAS_SCIPY @@ -62,7 +62,7 @@ def _remove_invalid_keys(input_dict, model): def _create_mlpackage( - proto_spec: _Model_pb2, + proto_spec: _proto.Model_pb2, weights_dir: _Optional[str] = None, package_path: _Optional[str] = None, ) -> str: @@ -190,7 +190,7 @@ def save_spec(spec, filename, auto_set_specification_version=False, weights_dir= f.write(spec.SerializeToString()) -def load_spec(model_path: str) -> _Model_pb2: +def load_spec(model_path: str) -> _proto.Model_pb2: """ Load a protobuf model specification from file (mlmodel) or directory (mlpackage). @@ -221,7 +221,7 @@ def load_spec(model_path: str) -> _Model_pb2: else: specfile = model_path - spec = _Model_pb2.Model() + spec = _proto.Model_pb2.Model() with open(specfile, "rb") as f: spec.ParseFromString(f.read()) return spec @@ -304,7 +304,9 @@ def _wp_to_fp16wp(wp): def _convert_neural_network_spec_weights_to_fp16(fp_spec): from .neural_network.quantization_utils import ( - _QUANTIZATION_MODE_LINEAR_QUANTIZATION, _quantize_spec_weights) + _QUANTIZATION_MODE_LINEAR_QUANTIZATION, + _quantize_spec_weights, + ) qspec = _quantize_spec_weights(fp_spec, 16, _QUANTIZATION_MODE_LINEAR_QUANTIZATION) return qspec @@ -344,7 +346,6 @@ def _get_model(spec, compute_units=_ComputeUnit.ALL): else: return MLModel(spec, compute_units=compute_units) - def evaluate_regressor(model, data, target="target", verbose=False): """ Evaluate a CoreML regression model and compare against predictions @@ -982,13 +983,8 @@ def convert_double_to_float_multiarray_type(spec): def _convert_to_float(feature): if feature.type.HasField("multiArrayType"): - if ( - feature.type.multiArrayType.dataType - == _Model_pb2.ArrayFeatureType.DOUBLE - ): - feature.type.multiArrayType.dataType = ( - _Model_pb2.ArrayFeatureType.FLOAT32 - ) + if feature.type.multiArrayType.dataType == _proto.Model_pb2.ArrayFeatureType.DOUBLE: + feature.type.multiArrayType.dataType = _proto.Model_pb2.ArrayFeatureType.FLOAT32 for feature in spec.description.input: _convert_to_float(feature) @@ -1004,7 +1000,7 @@ def _convert_to_float(feature): convert_double_to_float_multiarray_type(model_spec) -def compile_model(model: _Model_pb2.Model, destination_path: _Optional[str]=None) -> str: +def compile_model(model: _proto.Model_pb2.Model, destination_path: _Optional[str] = None) -> str: """ Compiles a Core ML model spec. @@ -1036,13 +1032,13 @@ def compile_model(model: _Model_pb2.Model, destination_path: _Optional[str]=None spec.specificationVersion = 1 input_ = spec.description.input.add() - input_.name = 'x' + input_.name = "x" input_.type.doubleType.MergeFromString(b"") output_ = spec.description.output.add() - output_.name = 'y' + output_.name = "y" output_.type.doubleType.MergeFromString(b"") - spec.description.predictedFeatureName = 'y' + spec.description.predictedFeatureName = "y" lr = spec.glmRegressor lr.offset.append(0.1) @@ -1051,7 +1047,7 @@ def compile_model(model: _Model_pb2.Model, destination_path: _Optional[str]=None compiled_model_path = compile_model(spec) model = CompiledMLModel(compiled_model_path) - y = model.predict({'x': 2}) + y = model.predict({"x": 2}) See Also -------- @@ -1072,7 +1068,7 @@ def compile_model(model: _Model_pb2.Model, destination_path: _Optional[str]=None if isinstance(model, _ct.models.MLModel): raise TypeError("This model has already been compiled. Call \"get_compiled_model_path\"" " to get the compiled model.") - if not isinstance(model, _Model_pb2.Model): + if not isinstance(model, _proto.Model_pb2.Model): raise TypeError("Unrecognized input for \"model\" parameter. It should be a spec.") # Check file extension of destination_path parameter @@ -1128,20 +1124,20 @@ def make_pipeline( -------- .. sourcecode:: python - my_model1 = ct.models.MLModel('/tmp/m1.mlpackage') - my_model2 = ct.models.MLModel('/tmp/m2.mlmodel') - + my_model1 = ct.models.MLModel("/tmp/m1.mlpackage") + my_model2 = ct.models.MLModel("/tmp/m2.mlmodel") + my_pipeline_model = ct.utils.make_pipeline(my_model1, my_model2) - y = my_pipeline_model.predict({'x': 12}) + y = my_pipeline_model.predict({"x": 12}) - my_pipeline_model.save('/tmp/my_pipeline.mlpackage') - new_my_pipeline = ct.model.MLModel('/tmp/my_pipeline.mlpackage') + my_pipeline_model.save("/tmp/my_pipeline.mlpackage") + new_my_pipeline = ct.model.MLModel("/tmp/my_pipeline.mlpackage") """ def updateBlobFileName(proto_message, new_path): - if type(proto_message) == _mil_proto.Value: + if type(proto_message) == _proto.MIL_pb2.Value: # Value protobuf message. This is what might need to be updated. if proto_message.WhichOneof('value') == 'blobFileValue': assert proto_message.blobFileValue.fileName == "@model_path/weights/weight.bin" diff --git a/coremltools/optimize/coreml/_config.py b/coremltools/optimize/coreml/_config.py index d43f46d25..9835099f8 100644 --- a/coremltools/optimize/coreml/_config.py +++ b/coremltools/optimize/coreml/_config.py @@ -790,8 +790,8 @@ def _get_const_op_config(self, op: Operation): if not isinstance(op, Operation): raise TypeError(f"op must be type of Operation. Got {type(op)}") - if op.op_type != "const": - raise TypeError(f"op must be of type const. Got {op.op_type}") + if not (op.op_type == "const" or op.op_type.startswith("constexpr_")): + raise TypeError(f"op must be of type const or constexpr. Got {op.op_type}") if op.name in self.op_name_configs: return self.op_name_configs[op.name] diff --git a/coremltools/optimize/coreml/_post_training_quantization.py b/coremltools/optimize/coreml/_post_training_quantization.py index 8b86704c3..3e4d0ae03 100644 --- a/coremltools/optimize/coreml/_post_training_quantization.py +++ b/coremltools/optimize/coreml/_post_training_quantization.py @@ -16,6 +16,8 @@ from coremltools.converters.mil.mil.passes.defs.quantization import ( AbstractQuantizationPass as _AbstractQuantizationPass, ) +from coremltools.converters.mil.mil.passes.graph_pass import PassOption +from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY from coremltools.models import MLModel as _MLModel from coremltools.optimize.coreml import OptimizationConfig as _OptimizationConfig from coremltools.optimize.coreml._config import _MetaDataDict @@ -23,7 +25,6 @@ from ._quantization_passes import WeightDecompressor as _WeightDecompressor from ._quantization_passes import linear_quantize_weights as _linear_quantize_weights from ._quantization_passes import palettize_weights as _palettize_weights -from ._quantization_passes import prune_weights as _prune_weights def _convert_model_spec_to_pymil_prog( @@ -319,8 +320,8 @@ def prune_weights(mlmodel: _MLModel, config: _OptimizationConfig): compressed_model = cto.coreml.prune_weights(model, config) """ - - weight_pruner = _prune_weights(config, fake_compression=False) + weight_pruner = PASS_REGISTRY["compression::prune_weights"] + weight_pruner.set_options([PassOption("config", config)]) return _apply_graph_pass(mlmodel, weight_pruner) def decompress_weights(mlmodel: _MLModel): @@ -477,7 +478,7 @@ def _get_weight_metadata(op): def get_weights_meta_block(block): # get the candidates ops with the given op_type candidate_ops = [] - for op in list(block.operations): + for op in block.operations: for b in op.blocks: get_weights_meta_block(b) diff --git a/coremltools/optimize/coreml/_quantization_passes.py b/coremltools/optimize/coreml/_quantization_passes.py index 878fa3d68..0338c2e4c 100644 --- a/coremltools/optimize/coreml/_quantization_passes.py +++ b/coremltools/optimize/coreml/_quantization_passes.py @@ -8,6 +8,7 @@ import numpy as np from tqdm import tqdm +import coremltools.converters.mil.frontend._utils as frontend_utils from coremltools import _logger as logger from coremltools.converters.mil._deployment_compatibility import AvailableTarget from coremltools.converters.mil.backend.mil.load import should_use_weight_file @@ -24,7 +25,10 @@ from coremltools.converters.mil.mil.passes.helper import block_context_manager from coremltools.converters.mil.mil.passes.pass_registry import register_pass from coremltools.converters.mil.mil.types.type_mapping import nptype_from_builtin +from coremltools.converters.mil.mil.var import Var +from coremltools.models._deprecation import deprecated as _deprecated from coremltools.models.neural_network.quantization_utils import _get_kmeans_lookup_table_and_weight +from coremltools.optimize.coreml import _utils from coremltools.optimize.coreml._config import ( OpLinearQuantizerConfig, OpMagnitudePrunerConfig, @@ -117,36 +121,48 @@ def apply_block(block): apply_block(f) @property - def config(self): + def config(self) -> OptimizationConfig: return self._config @config.setter - def config(self, value): + def config(self, value: OptimizationConfig): self._check_config_type(value) self._config = value + if value._op_selector is not None: + self.op_selector = value._op_selector - @staticmethod - def need_compress_const(op: Operation, _is_deprecated: bool, weight_threshold: float): + def need_compress_const( + self, op: Operation, _is_deprecated: bool, weight_threshold: float + ) -> bool: """ The utility function is checking whether a const op can be compressed. If ``_is_deprecated = True``, the user is using the ``ct.compression_utils``, in which the ops are already filtered by ``op_selector``. For the new ``ct.optimize.coreml`` API, ``op_selector`` is no longer supported, so the ``weight_threshold`` is checked explicitly instead. """ - val = op.outputs[0].val + val = self._get_const_value(op) if _is_deprecated and weight_threshold != None: raise ValueError("weight_threshold cannot be set through the deprecated ct.compression_util API") if _is_deprecated: return should_use_weight_file(val) - # const fed into constexpr ops cannot be compressed - if any([child_op.op_type.startswith("constexpr") for child_op in op.outputs[0].child_ops]): + if not self._validate_child_constexpr_for_compress(op): return False if weight_threshold is None: raise ValueError("weight_threshold cannot be None") - return should_use_weight_file(val) and val.size > weight_threshold + return ( + should_use_weight_file(val) and self._get_weight_to_compress_size(op) > weight_threshold + ) + + def _validate_child_constexpr_for_compress(self, op: Operation) -> bool: + """Check if child constexpr ops support current op to be compressed.""" + for child_op in op.outputs[0].child_ops: + if child_op.op_type.startswith("constexpr_"): + # Const fed into constexpr_ ops cannot be further compressed. + return False + return True def _check_config_type(self, config: OptimizationConfig): """ @@ -169,28 +185,53 @@ def get_supported_types_as_str(supported_type): raise ValueError(f"{self.__class__.__name__} only accept {supported_type_str} type config. Got {config.__class__.__name__}.") @staticmethod - def pick_channnel_axis(op: Operation) -> int: + def select_input_output_channel_axis(op: Operation) -> Tuple[int, int]: """ - By default, output channel is used as the channel axis. Here are some representative ops: + Here are some representative ops: - linear: [D_out, D_in] - matmul's y: [..., D_in, D_out] if transpose_y is False, else [..., D_out, D_in] - conv: [C_out, C_in_div_group, KH, KW] - conv_transpose: [C_in, C_out_div_group, KH, KW] - So the channel axis picking criterial is: - - For conv_transpose it's 1 - - For matmul's y it's -1 (transpose_y=False) or -2 (transpose_y=True) - - For all other ops, it's 0 + The input output channel axis selection criteria is: + - For conv_transpose the output channel is 1 and input channel is 0. + - For matmul's y: + - When transpose_y=False, output channel is -1 and input channel is -2 + - When transpose_y=True, output channel is -2 and input channel is -1 + - For all other ops, output channel is 0 and input channel is 1. """ - channel_axis = 0 + output_channel_axis, input_channel_axis = 0, 1 var = op.outputs[0] if len(var.child_ops) == 1: child_op = var.child_ops[0] if child_op.op_type == "conv_transpose": - channel_axis = 1 + output_channel_axis = 1 + input_channel_axis = 0 if child_op.op_type == "matmul" and child_op.y == var: - channel_axis = -1 if child_op.transpose_y else -2 - return channel_axis + if child_op.transpose_y.val: + output_channel_axis = -2 + input_channel_axis = -1 + else: + output_channel_axis = -1 + input_channel_axis = -2 + if child_op.op_type.startswith("constexpr_"): + return AbstractCompressionPass.select_input_output_channel_axis(child_op) + return input_channel_axis, output_channel_axis + + def is_valid_op(self, op: Operation): + if op.op_type == "const" and should_use_weight_file(self._get_const_value(op)): + return True + return False + + def _get_const_value(self, op: Operation) -> np.ndarray: + if op.op_type != "const": + raise ValueError(f"The op {op} is not a const") + return op.outputs[0].val + + def _get_weight_to_compress_size(self, op: Operation) -> int: + if op.op_type != "const": + raise ValueError("Only const weight can be compressed") + return np.prod(op.outputs[0].shape) @register_pass(namespace="compression") @@ -210,11 +251,6 @@ class prune_weights(AbstractCompressionPass): """ _SUPPORTED_CONFIG_TYPE = (OpMagnitudePrunerConfig, OpThresholdPrunerConfig) - def is_valid_op(self, op: Operation): - if op.op_type == "const" and should_use_weight_file(op.outputs[0].val): - return True - return False - @staticmethod def _pack_val_to_sparse_param(val): flattened_val = val.flatten() @@ -367,6 +403,16 @@ def decompress(params): raise ValueError("Invalid type of params") return constexpr_sparse_to_dense.decompress(params.nonzero_data, params.mask, params.shape) + @staticmethod + def _create_constexpr_var(op: Operation, sparse_params: SparseParams) -> Var: + return mb.constexpr_sparse_to_dense( + nonzero_data=sparse_params.nonzero_data, + mask=sparse_params.mask, + shape=np.uint32(sparse_params.shape), + before_op=op, + name=op.name + "_sparsified", + ) + def transform_op(self, op: Operation): op_config = self.config._get_const_op_config(op) if op_config is None: @@ -374,15 +420,16 @@ def transform_op(self, op: Operation): if not self.need_compress_const(op, self.config._is_deprecated, op_config.weight_threshold): return - if not isinstance(op.outputs[0].val, (np.ndarray, np.generic)): + const_val = self._get_const_value(op) + if not isinstance(const_val, (np.ndarray, np.generic)): raise ValueError("Only numpy arrays are supported") if isinstance(op_config, OpThresholdPrunerConfig): sparse_params = self.compress_by_threshold( - val=op.outputs[0].val, - threshold=op_config.threshold, - minimum_sparsity_percentile=op_config.minimum_sparsity_percentile - ) + val=const_val, + threshold=op_config.threshold, + minimum_sparsity_percentile=op_config.minimum_sparsity_percentile, + ) elif isinstance(op_config, OpMagnitudePrunerConfig): # Structural sparsity can only be applied to conv / linear weight # For non applicable constant, we skip the compression, @@ -395,29 +442,23 @@ def transform_op(self, op: Operation): if op_config.target_sparsity is not None: sparse_params = self.compress_by_magnitude( - val=op.outputs[0].val, - target_sparsity=op_config.target_sparsity, - block_size=op_config.block_size, - dim=op_config.dim, - ) + val=const_val, + target_sparsity=op_config.target_sparsity, + block_size=op_config.block_size, + dim=op_config.dim, + ) elif op_config.n_m_ratio is not None: sparse_params = self.compress_by_nm_sparsity( - val=op.outputs[0].val, - n_m_ratio=op_config.n_m_ratio, - dim=op_config.dim, - ) + val=const_val, + n_m_ratio=op_config.n_m_ratio, + dim=op_config.dim, + ) if sparse_params is None: return if not self.fake_compression: - new_var = mb.constexpr_sparse_to_dense( - nonzero_data=sparse_params.nonzero_data, - mask=sparse_params.mask, - shape=np.uint32(sparse_params.shape), - before_op=op, - name=op.name + "_sparsified", - ) + new_var = self._create_constexpr_var(op, sparse_params) else: decompressed_val = self.decompress(sparse_params) new_var = mb.const( @@ -453,19 +494,22 @@ class palettize_weights(AbstractCompressionPass): _SUPPORTED_CONFIG_TYPE = OpPalettizerConfig _SUPPORTED_NBITS = (1, 2, 4, 6, 8) - def is_valid_op(self, op: Operation): - if op.op_type == "const" and should_use_weight_file(op.outputs[0].val): - return True - return False - @staticmethod def _get_nbits_for_unique_mode(val: np.ndarray, allowed_nbits: Tuple[int, ...]) -> int: + """ + Try each nbit in allowed_nbits to find one that can represent number of unique values in val. + + Note that the values in `allowed_nbits` need to be in ascending order. + """ val = val.flatten() unique_vals = np.unique(val).tolist() for nbits in allowed_nbits: if len(unique_vals) <= 1 << nbits: return nbits - raise ValueError("Unique values in weight cannot be represented by 8 bits palettization.") + raise ValueError( + f"Unique values in weight cannot be represented by {allowed_nbits[-1]} " + "bits palettization." + ) @staticmethod def _get_lut_and_indices( @@ -572,6 +616,16 @@ def decompress(params): raise ValueError("Invalid type of params") return constexpr_lut_to_dense.decompress(params.lut, params.indices, params.shape) + @staticmethod + def _create_constexpr_var(op: Operation, lut_params: LutParams) -> Var: + return mb.constexpr_lut_to_dense( + indices=lut_params.indices, + lut=lut_params.lut, + shape=np.uint32(lut_params.shape), + before_op=op, + name=op.name + "_palettized", + ) + def transform_op(self, op: Operation): op_config = self.config._get_const_op_config(op) if op_config is None: @@ -596,13 +650,7 @@ def transform_op(self, op: Operation): ) if not self.fake_compression: - new_var = mb.constexpr_lut_to_dense( - indices=lut_params.indices, - lut=lut_params.lut, - shape=np.uint32(lut_params.shape), - before_op=op, - name=op.name + "_palettized", - ) + new_var = palettize_weights._create_constexpr_var(op, lut_params) else: decompressed_val = self.decompress(lut_params) new_var = mb.const( @@ -620,6 +668,7 @@ def transform_op(self, op: Operation): op.enclosing_block.remove_ops([op]) + @register_pass(namespace="compression") class linear_quantize_weights(AbstractCompressionPass): """ @@ -642,16 +691,16 @@ class linear_quantize_weights(AbstractCompressionPass): (types.uint8, "LINEAR_SYMMETRIC"): (0, 254), } - def is_valid_op(self, op: Operation): - if op.op_type == "const" and should_use_weight_file(op.outputs[0].val): - return True - return False - @classmethod + @_deprecated( + suffix="Please use _utils.quantize_weight", + version="8.0", + obj_prefix="coremltools.optimize.coreml._quantization_passes.", + ) def _get_quantized_data( cls, original_data: np.ndarray, axes: Tuple[int, ...], mode: str, dtype: type ) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]: - """Get quantized data along with metadata (scale, zero_point).""" + """[Deprecated] Get quantized data along with metadata (scale, zero_point).""" if not np.issubdtype(original_data.dtype, np.floating): raise ValueError("Only floating numpy arrays are supported.") @@ -696,8 +745,21 @@ def _get_quantized_data( def compress(cls, val: np.ndarray, axis: int, mode: str, dtype: type) -> AffineQuantParams: if not isinstance(val, (np.ndarray, np.generic)): raise ValueError("Only numpy arrays are supported") + if isinstance(dtype, np.dtype): + dtype = types.numpy_type_to_builtin_type(dtype) + if not types.is_builtin(dtype): + raise ValueError(f"The input dtype is should be a built-in type, but got {type(dtype)}") + axes = tuple([i for i in range(len(val.shape)) if i != axis]) - quantized_data, scale, zero_point = cls._get_quantized_data(val, axes, mode, dtype) + quantized_data, scale, zero_point = _utils.quantize_weight( + val, + axes, + nbits=dtype.get_bitwidth(), + signed=not dtype.is_unsigned(), + quantization_mode=mode, + dtype=types.nptype_from_builtin(dtype), + ) + if zero_point is None: # The iOS16 constexpr_affine_dequantize op requires zero_point. zero_point = np.zeros_like(scale).astype(quantized_data.dtype) @@ -718,18 +780,19 @@ def transform_op(self, op: Operation): if not self.need_compress_const(op, self.config._is_deprecated, op_config.weight_threshold): return + output_channel = self.select_input_output_channel_axis(op)[1] quant_params = self.compress( - op.outputs[0].val, self.pick_channnel_axis(op), op_config.mode, op_config.dtype + op.outputs[0].val, output_channel, op_config.mode, op_config.dtype ) if not self.fake_compression: - new_var = mb.constexpr_affine_dequantize( - quantized_data=quant_params.quantized_data, - zero_point=quant_params.zero_point, - scale=quant_params.scale, - axis=quant_params.axis, - before_op=op, + new_var = frontend_utils._construct_constexpr_affine_op( + quant_params.quantized_data, + quant_params.zero_point, + quant_params.scale, + quant_params.axis, name=op.name + "_affine_quantized", + before_op=op, ) else: decompressed_val = self.decompress(quant_params) @@ -752,11 +815,7 @@ def transform_op(self, op: Operation): class WeightDecompressor(AbstractQuantizationPass): """ This graph pass transforms the ``constexpr`` op back into ``mb.const`` op. - The ``constexpr`` op includes: - - - ``constexpr_affine_dequantize`` - - ``constexpr_lut_to_dense`` - - ``constexpr_sparse_to_dense`` + The ``constexpr`` op has op_type starts with the "constexpr_" prefix. """ def __init__(self, op_selector): @@ -767,18 +826,24 @@ def is_valid_op(self, op): def transform_op(self, op): decompressed_val = op.materialized_val_inference() - new_var = mb.const( - val=decompressed_val, - before_op=op, - name=op.name, - ) - op.enclosing_block.replace_uses_of_var_after_op( - anchor_op=op, - old_var=op.outputs[0], - new_var=new_var, - no_check_var_types=True, - force_replace=True, - ) + if not isinstance(decompressed_val, (list, tuple)): + decompressed_val = [decompressed_val] + + if len(decompressed_val) != len(op.outputs): + raise ValueError( + "The number of decompressed value should match the number of op outputs. " + f"But got {len(decompressed_val)} vs {len(op.outputs)}" + ) + + for decomp_val, output_var in zip(decompressed_val, op.outputs): + new_const = mb.const(val=decomp_val, before_op=op, name=op.name) + op.enclosing_block.replace_uses_of_var_after_op( + anchor_op=op, + old_var=output_var, + new_var=new_const, + no_check_var_types=True, + force_replace=True, + ) op.enclosing_block.remove_ops([op]) diff --git a/coremltools/optimize/coreml/_utils.py b/coremltools/optimize/coreml/_utils.py new file mode 100644 index 000000000..75a953419 --- /dev/null +++ b/coremltools/optimize/coreml/_utils.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Apple Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-3-clause license that can be +# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause + +import numpy as np +from typing import Tuple, Optional + + +def get_quant_range(n_bits: int, signed: bool, mode: str) -> Tuple[int, int]: + """ + Utility to get the quantization range for a given quantization config + Adapted from phoenix/quatization/_utils.py + """ + max_q = 2**n_bits + if not signed: + quant_min = 0 + quant_max = max_q - 1 + if mode == "LINEAR_SYMMETRIC": + quant_max -= 1 + else: + quant_min = -max_q / 2 + quant_max = max_q / 2 - 1 + if mode == "LINEAR_SYMMETRIC": + quant_min += 1 + return int(quant_min), int(quant_max) + + +def quantize_weight( + weight: np.ndarray, + axes: Tuple[int, ...], + nbits: int, + signed: bool, + quantization_mode: str, + dtype: np.dtype, +) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]: + """Get quantized data along with metadata (scale, zero_point).""" + if not np.issubdtype(weight.dtype, np.floating): + raise ValueError("Only floating numpy arrays are supported.") + + val_min = np.amin(weight, axis=axes, keepdims=True) + val_max = np.amax(weight, axis=axes, keepdims=True) + + q_val_min, q_val_max = get_quant_range(nbits, signed, quantization_mode) + + zero_point = None + if quantization_mode == "LINEAR_SYMMETRIC": + # For the linear_symmetric quantization_mode, the range is symmetrical to 0 + max_abs = np.maximum(np.abs(val_min), np.abs(val_max)) + val_min = -max_abs + val_max = max_abs + + if not signed: + zero_point_shift = q_val_max // 2 + zero_point = zero_point_shift * np.ones(val_min.shape) + else: + assert quantization_mode == "LINEAR" + # For the linear quantization_mode, we need to make sure the data range contains `0` + val_min = np.minimum(0.0, val_min) + val_max = np.maximum(0.0, val_max) + zero_point = (q_val_min * val_max - q_val_max * val_min) / (val_max - val_min) + zero_point = np.round(zero_point) + zero_point = np.clip(zero_point, q_val_min, q_val_max) + + scale = (val_max - val_min) / (q_val_max - q_val_min) + quantized_data = np.round(weight / scale) + if zero_point is not None: + quantized_data += zero_point + zero_point = zero_point.squeeze().astype(dtype) + quantized_data = np.clip(quantized_data, q_val_min, q_val_max).astype(dtype) + scale = scale.astype(weight.dtype).squeeze() + + return quantized_data, scale, zero_point diff --git a/coremltools/proto/MIL_pb2.py b/coremltools/proto/MIL_pb2.py index 0e9bf64f9..b1be30e92 100644 --- a/coremltools/proto/MIL_pb2.py +++ b/coremltools/proto/MIL_pb2.py @@ -20,7 +20,7 @@ name='MIL.proto', package='CoreML.Specification.MILSpec', syntax='proto3', - serialized_pb=_b('\n\tMIL.proto\x12\x1c\x43oreML.Specification.MILSpec\"\xf3\x02\n\x07Program\x12\x0f\n\x07version\x18\x01 \x01(\x03\x12G\n\tfunctions\x18\x02 \x03(\x0b\x32\x34.CoreML.Specification.MILSpec.Program.FunctionsEntry\x12\x11\n\tdocString\x18\x03 \x01(\t\x12I\n\nattributes\x18\x04 \x03(\x0b\x32\x35.CoreML.Specification.MILSpec.Program.AttributesEntry\x1aX\n\x0e\x46unctionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x35\n\x05value\x18\x02 \x01(\x0b\x32&.CoreML.Specification.MILSpec.Function:\x02\x38\x01\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"\xbe\x03\n\x08\x46unction\x12<\n\x06inputs\x18\x01 \x03(\x0b\x32,.CoreML.Specification.MILSpec.NamedValueType\x12\r\n\x05opset\x18\x02 \x01(\t\x12_\n\x15\x62lock_specializations\x18\x03 \x03(\x0b\x32@.CoreML.Specification.MILSpec.Function.BlockSpecializationsEntry\x12J\n\nattributes\x18\x04 \x03(\x0b\x32\x36.CoreML.Specification.MILSpec.Function.AttributesEntry\x1a`\n\x19\x42lockSpecializationsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Block:\x02\x38\x01\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"\xb4\x02\n\x05\x42lock\x12<\n\x06inputs\x18\x01 \x03(\x0b\x32,.CoreML.Specification.MILSpec.NamedValueType\x12\x0f\n\x07outputs\x18\x02 \x03(\t\x12;\n\noperations\x18\x03 \x03(\x0b\x32\'.CoreML.Specification.MILSpec.Operation\x12G\n\nattributes\x18\x04 \x03(\x0b\x32\x33.CoreML.Specification.MILSpec.Block.AttributesEntry\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"\xa9\x01\n\x08\x41rgument\x12\x41\n\targuments\x18\x01 \x03(\x0b\x32..CoreML.Specification.MILSpec.Argument.Binding\x1aZ\n\x07\x42inding\x12\x0e\n\x04name\x18\x01 \x01(\tH\x00\x12\x34\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.ValueH\x00\x42\t\n\x07\x62inding\"\xce\x03\n\tOperation\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x43\n\x06inputs\x18\x02 \x03(\x0b\x32\x33.CoreML.Specification.MILSpec.Operation.InputsEntry\x12=\n\x07outputs\x18\x03 \x03(\x0b\x32,.CoreML.Specification.MILSpec.NamedValueType\x12\x33\n\x06\x62locks\x18\x04 \x03(\x0b\x32#.CoreML.Specification.MILSpec.Block\x12K\n\nattributes\x18\x05 \x03(\x0b\x32\x37.CoreML.Specification.MILSpec.Operation.AttributesEntry\x1aU\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x35\n\x05value\x18\x02 \x01(\x0b\x32&.CoreML.Specification.MILSpec.Argument:\x02\x38\x01\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"U\n\x0eNamedValueType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x35\n\x04type\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\"\x95\x02\n\tValueType\x12>\n\ntensorType\x18\x01 \x01(\x0b\x32(.CoreML.Specification.MILSpec.TensorTypeH\x00\x12:\n\x08listType\x18\x02 \x01(\x0b\x32&.CoreML.Specification.MILSpec.ListTypeH\x00\x12<\n\ttupleType\x18\x03 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.TupleTypeH\x00\x12\x46\n\x0e\x64ictionaryType\x18\x04 \x01(\x0b\x32,.CoreML.Specification.MILSpec.DictionaryTypeH\x00\x42\x06\n\x04type\"\xb7\x02\n\nTensorType\x12\x38\n\x08\x64\x61taType\x18\x01 \x01(\x0e\x32&.CoreML.Specification.MILSpec.DataType\x12\x0c\n\x04rank\x18\x02 \x01(\x03\x12;\n\ndimensions\x18\x03 \x03(\x0b\x32\'.CoreML.Specification.MILSpec.Dimension\x12L\n\nattributes\x18\x04 \x03(\x0b\x32\x38.CoreML.Specification.MILSpec.TensorType.AttributesEntry\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"C\n\tTupleType\x12\x36\n\x05types\x18\x01 \x03(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\"z\n\x08ListType\x12\x35\n\x04type\x18\x01 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\x12\x37\n\x06length\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.Dimension\"\x86\x01\n\x0e\x44ictionaryType\x12\x38\n\x07keyType\x18\x01 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\x12:\n\tvalueType\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\"\xfd\x01\n\tDimension\x12M\n\x08\x63onstant\x18\x01 \x01(\x0b\x32\x39.CoreML.Specification.MILSpec.Dimension.ConstantDimensionH\x00\x12K\n\x07unknown\x18\x02 \x01(\x0b\x32\x38.CoreML.Specification.MILSpec.Dimension.UnknownDimensionH\x00\x1a!\n\x11\x43onstantDimension\x12\x0c\n\x04size\x18\x01 \x01(\x04\x1a$\n\x10UnknownDimension\x12\x10\n\x08variadic\x18\x01 \x01(\x08\x42\x0b\n\tdimension\"\xb9\x04\n\x05Value\x12\x11\n\tdocString\x18\x01 \x01(\t\x12\x35\n\x04type\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\x12L\n\x0eimmediateValue\x18\x03 \x01(\x0b\x32\x32.CoreML.Specification.MILSpec.Value.ImmediateValueH\x00\x12J\n\rblobFileValue\x18\x05 \x01(\x0b\x32\x31.CoreML.Specification.MILSpec.Value.BlobFileValueH\x00\x1a\x8f\x02\n\x0eImmediateValue\x12;\n\x06tensor\x18\x01 \x01(\x0b\x32).CoreML.Specification.MILSpec.TensorValueH\x00\x12\x39\n\x05tuple\x18\x02 \x01(\x0b\x32(.CoreML.Specification.MILSpec.TupleValueH\x00\x12\x37\n\x04list\x18\x03 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ListValueH\x00\x12\x43\n\ndictionary\x18\x04 \x01(\x0b\x32-.CoreML.Specification.MILSpec.DictionaryValueH\x00\x42\x07\n\x05value\x1a\x31\n\rBlobFileValue\x12\x10\n\x08\x66ileName\x18\x01 \x01(\t\x12\x0e\n\x06offset\x18\x02 \x01(\x04\x42\x07\n\x05value\"\xac\x06\n\x0bTensorValue\x12J\n\x06\x66loats\x18\x01 \x01(\x0b\x32\x38.CoreML.Specification.MILSpec.TensorValue.RepeatedFloatsH\x00\x12\x46\n\x04ints\x18\x02 \x01(\x0b\x32\x36.CoreML.Specification.MILSpec.TensorValue.RepeatedIntsH\x00\x12H\n\x05\x62ools\x18\x03 \x01(\x0b\x32\x37.CoreML.Specification.MILSpec.TensorValue.RepeatedBoolsH\x00\x12L\n\x07strings\x18\x04 \x01(\x0b\x32\x39.CoreML.Specification.MILSpec.TensorValue.RepeatedStringsH\x00\x12N\n\x08longInts\x18\x05 \x01(\x0b\x32:.CoreML.Specification.MILSpec.TensorValue.RepeatedLongIntsH\x00\x12L\n\x07\x64oubles\x18\x06 \x01(\x0b\x32\x39.CoreML.Specification.MILSpec.TensorValue.RepeatedDoublesH\x00\x12H\n\x05\x62ytes\x18\x07 \x01(\x0b\x32\x37.CoreML.Specification.MILSpec.TensorValue.RepeatedBytesH\x00\x1a$\n\x0eRepeatedFloats\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x1a%\n\x0fRepeatedDoubles\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x10\x01\x1a\"\n\x0cRepeatedInts\x12\x12\n\x06values\x18\x01 \x03(\x05\x42\x02\x10\x01\x1a&\n\x10RepeatedLongInts\x12\x12\n\x06values\x18\x01 \x03(\x03\x42\x02\x10\x01\x1a#\n\rRepeatedBools\x12\x12\n\x06values\x18\x01 \x03(\x08\x42\x02\x10\x01\x1a!\n\x0fRepeatedStrings\x12\x0e\n\x06values\x18\x01 \x03(\t\x1a\x1f\n\rRepeatedBytes\x12\x0e\n\x06values\x18\x01 \x01(\x0c\x42\x07\n\x05value\"A\n\nTupleValue\x12\x33\n\x06values\x18\x01 \x03(\x0b\x32#.CoreML.Specification.MILSpec.Value\"@\n\tListValue\x12\x33\n\x06values\x18\x01 \x03(\x0b\x32#.CoreML.Specification.MILSpec.Value\"\xd3\x01\n\x0f\x44ictionaryValue\x12J\n\x06values\x18\x01 \x03(\x0b\x32:.CoreML.Specification.MILSpec.DictionaryValue.KeyValuePair\x1at\n\x0cKeyValuePair\x12\x30\n\x03key\x18\x01 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value*\xb2\x01\n\x08\x44\x61taType\x12\x0f\n\x0bUNUSED_TYPE\x10\x00\x12\x08\n\x04\x42OOL\x10\x01\x12\n\n\x06STRING\x10\x02\x12\x0b\n\x07\x46LOAT16\x10\n\x12\x0b\n\x07\x46LOAT32\x10\x0b\x12\x0b\n\x07\x46LOAT64\x10\x0c\x12\x08\n\x04INT8\x10\x15\x12\t\n\x05INT16\x10\x16\x12\t\n\x05INT32\x10\x17\x12\t\n\x05INT64\x10\x18\x12\t\n\x05UINT8\x10\x1f\x12\n\n\x06UINT16\x10 \x12\n\n\x06UINT32\x10!\x12\n\n\x06UINT64\x10\"B\x02H\x03\x62\x06proto3') + serialized_pb=_b('\n\tMIL.proto\x12\x1c\x43oreML.Specification.MILSpec\"\xf3\x02\n\x07Program\x12\x0f\n\x07version\x18\x01 \x01(\x03\x12G\n\tfunctions\x18\x02 \x03(\x0b\x32\x34.CoreML.Specification.MILSpec.Program.FunctionsEntry\x12\x11\n\tdocString\x18\x03 \x01(\t\x12I\n\nattributes\x18\x04 \x03(\x0b\x32\x35.CoreML.Specification.MILSpec.Program.AttributesEntry\x1aX\n\x0e\x46unctionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x35\n\x05value\x18\x02 \x01(\x0b\x32&.CoreML.Specification.MILSpec.Function:\x02\x38\x01\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"\xbe\x03\n\x08\x46unction\x12<\n\x06inputs\x18\x01 \x03(\x0b\x32,.CoreML.Specification.MILSpec.NamedValueType\x12\r\n\x05opset\x18\x02 \x01(\t\x12_\n\x15\x62lock_specializations\x18\x03 \x03(\x0b\x32@.CoreML.Specification.MILSpec.Function.BlockSpecializationsEntry\x12J\n\nattributes\x18\x04 \x03(\x0b\x32\x36.CoreML.Specification.MILSpec.Function.AttributesEntry\x1a`\n\x19\x42lockSpecializationsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Block:\x02\x38\x01\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"\xb4\x02\n\x05\x42lock\x12<\n\x06inputs\x18\x01 \x03(\x0b\x32,.CoreML.Specification.MILSpec.NamedValueType\x12\x0f\n\x07outputs\x18\x02 \x03(\t\x12;\n\noperations\x18\x03 \x03(\x0b\x32\'.CoreML.Specification.MILSpec.Operation\x12G\n\nattributes\x18\x04 \x03(\x0b\x32\x33.CoreML.Specification.MILSpec.Block.AttributesEntry\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"\xa9\x01\n\x08\x41rgument\x12\x41\n\targuments\x18\x01 \x03(\x0b\x32..CoreML.Specification.MILSpec.Argument.Binding\x1aZ\n\x07\x42inding\x12\x0e\n\x04name\x18\x01 \x01(\tH\x00\x12\x34\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.ValueH\x00\x42\t\n\x07\x62inding\"\xce\x03\n\tOperation\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x43\n\x06inputs\x18\x02 \x03(\x0b\x32\x33.CoreML.Specification.MILSpec.Operation.InputsEntry\x12=\n\x07outputs\x18\x03 \x03(\x0b\x32,.CoreML.Specification.MILSpec.NamedValueType\x12\x33\n\x06\x62locks\x18\x04 \x03(\x0b\x32#.CoreML.Specification.MILSpec.Block\x12K\n\nattributes\x18\x05 \x03(\x0b\x32\x37.CoreML.Specification.MILSpec.Operation.AttributesEntry\x1aU\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x35\n\x05value\x18\x02 \x01(\x0b\x32&.CoreML.Specification.MILSpec.Argument:\x02\x38\x01\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"U\n\x0eNamedValueType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x35\n\x04type\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\"\x95\x02\n\tValueType\x12>\n\ntensorType\x18\x01 \x01(\x0b\x32(.CoreML.Specification.MILSpec.TensorTypeH\x00\x12:\n\x08listType\x18\x02 \x01(\x0b\x32&.CoreML.Specification.MILSpec.ListTypeH\x00\x12<\n\ttupleType\x18\x03 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.TupleTypeH\x00\x12\x46\n\x0e\x64ictionaryType\x18\x04 \x01(\x0b\x32,.CoreML.Specification.MILSpec.DictionaryTypeH\x00\x42\x06\n\x04type\"\xb7\x02\n\nTensorType\x12\x38\n\x08\x64\x61taType\x18\x01 \x01(\x0e\x32&.CoreML.Specification.MILSpec.DataType\x12\x0c\n\x04rank\x18\x02 \x01(\x03\x12;\n\ndimensions\x18\x03 \x03(\x0b\x32\'.CoreML.Specification.MILSpec.Dimension\x12L\n\nattributes\x18\x04 \x03(\x0b\x32\x38.CoreML.Specification.MILSpec.TensorType.AttributesEntry\x1aV\n\x0f\x41ttributesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value:\x02\x38\x01\"C\n\tTupleType\x12\x36\n\x05types\x18\x01 \x03(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\"z\n\x08ListType\x12\x35\n\x04type\x18\x01 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\x12\x37\n\x06length\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.Dimension\"\x86\x01\n\x0e\x44ictionaryType\x12\x38\n\x07keyType\x18\x01 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\x12:\n\tvalueType\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\"\xfd\x01\n\tDimension\x12M\n\x08\x63onstant\x18\x01 \x01(\x0b\x32\x39.CoreML.Specification.MILSpec.Dimension.ConstantDimensionH\x00\x12K\n\x07unknown\x18\x02 \x01(\x0b\x32\x38.CoreML.Specification.MILSpec.Dimension.UnknownDimensionH\x00\x1a!\n\x11\x43onstantDimension\x12\x0c\n\x04size\x18\x01 \x01(\x04\x1a$\n\x10UnknownDimension\x12\x10\n\x08variadic\x18\x01 \x01(\x08\x42\x0b\n\tdimension\"\xb9\x04\n\x05Value\x12\x11\n\tdocString\x18\x01 \x01(\t\x12\x35\n\x04type\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ValueType\x12L\n\x0eimmediateValue\x18\x03 \x01(\x0b\x32\x32.CoreML.Specification.MILSpec.Value.ImmediateValueH\x00\x12J\n\rblobFileValue\x18\x05 \x01(\x0b\x32\x31.CoreML.Specification.MILSpec.Value.BlobFileValueH\x00\x1a\x8f\x02\n\x0eImmediateValue\x12;\n\x06tensor\x18\x01 \x01(\x0b\x32).CoreML.Specification.MILSpec.TensorValueH\x00\x12\x39\n\x05tuple\x18\x02 \x01(\x0b\x32(.CoreML.Specification.MILSpec.TupleValueH\x00\x12\x37\n\x04list\x18\x03 \x01(\x0b\x32\'.CoreML.Specification.MILSpec.ListValueH\x00\x12\x43\n\ndictionary\x18\x04 \x01(\x0b\x32-.CoreML.Specification.MILSpec.DictionaryValueH\x00\x42\x07\n\x05value\x1a\x31\n\rBlobFileValue\x12\x10\n\x08\x66ileName\x18\x01 \x01(\t\x12\x0e\n\x06offset\x18\x02 \x01(\x04\x42\x07\n\x05value\"\xac\x06\n\x0bTensorValue\x12J\n\x06\x66loats\x18\x01 \x01(\x0b\x32\x38.CoreML.Specification.MILSpec.TensorValue.RepeatedFloatsH\x00\x12\x46\n\x04ints\x18\x02 \x01(\x0b\x32\x36.CoreML.Specification.MILSpec.TensorValue.RepeatedIntsH\x00\x12H\n\x05\x62ools\x18\x03 \x01(\x0b\x32\x37.CoreML.Specification.MILSpec.TensorValue.RepeatedBoolsH\x00\x12L\n\x07strings\x18\x04 \x01(\x0b\x32\x39.CoreML.Specification.MILSpec.TensorValue.RepeatedStringsH\x00\x12N\n\x08longInts\x18\x05 \x01(\x0b\x32:.CoreML.Specification.MILSpec.TensorValue.RepeatedLongIntsH\x00\x12L\n\x07\x64oubles\x18\x06 \x01(\x0b\x32\x39.CoreML.Specification.MILSpec.TensorValue.RepeatedDoublesH\x00\x12H\n\x05\x62ytes\x18\x07 \x01(\x0b\x32\x37.CoreML.Specification.MILSpec.TensorValue.RepeatedBytesH\x00\x1a$\n\x0eRepeatedFloats\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x1a%\n\x0fRepeatedDoubles\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x10\x01\x1a\"\n\x0cRepeatedInts\x12\x12\n\x06values\x18\x01 \x03(\x05\x42\x02\x10\x01\x1a&\n\x10RepeatedLongInts\x12\x12\n\x06values\x18\x01 \x03(\x03\x42\x02\x10\x01\x1a#\n\rRepeatedBools\x12\x12\n\x06values\x18\x01 \x03(\x08\x42\x02\x10\x01\x1a!\n\x0fRepeatedStrings\x12\x0e\n\x06values\x18\x01 \x03(\t\x1a\x1f\n\rRepeatedBytes\x12\x0e\n\x06values\x18\x01 \x01(\x0c\x42\x07\n\x05value\"A\n\nTupleValue\x12\x33\n\x06values\x18\x01 \x03(\x0b\x32#.CoreML.Specification.MILSpec.Value\"@\n\tListValue\x12\x33\n\x06values\x18\x01 \x03(\x0b\x32#.CoreML.Specification.MILSpec.Value\"\xd3\x01\n\x0f\x44ictionaryValue\x12J\n\x06values\x18\x01 \x03(\x0b\x32:.CoreML.Specification.MILSpec.DictionaryValue.KeyValuePair\x1at\n\x0cKeyValuePair\x12\x30\n\x03key\x18\x01 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.CoreML.Specification.MILSpec.Value*\xc0\x01\n\x08\x44\x61taType\x12\x0f\n\x0bUNUSED_TYPE\x10\x00\x12\x08\n\x04\x42OOL\x10\x01\x12\n\n\x06STRING\x10\x02\x12\x0b\n\x07\x46LOAT16\x10\n\x12\x0b\n\x07\x46LOAT32\x10\x0b\x12\x0b\n\x07\x46LOAT64\x10\x0c\x12\x0c\n\x08\x42\x46LOAT16\x10\r\x12\x08\n\x04INT8\x10\x15\x12\t\n\x05INT16\x10\x16\x12\t\n\x05INT32\x10\x17\x12\t\n\x05INT64\x10\x18\x12\t\n\x05UINT8\x10\x1f\x12\n\n\x06UINT16\x10 \x12\n\n\x06UINT32\x10!\x12\n\n\x06UINT64\x10\"B\x02H\x03\x62\x06proto3') ) _DATATYPE = _descriptor.EnumDescriptor( @@ -54,42 +54,46 @@ options=None, type=None), _descriptor.EnumValueDescriptor( - name='INT8', index=6, number=21, + name='BFLOAT16', index=6, number=13, options=None, type=None), _descriptor.EnumValueDescriptor( - name='INT16', index=7, number=22, + name='INT8', index=7, number=21, options=None, type=None), _descriptor.EnumValueDescriptor( - name='INT32', index=8, number=23, + name='INT16', index=8, number=22, options=None, type=None), _descriptor.EnumValueDescriptor( - name='INT64', index=9, number=24, + name='INT32', index=9, number=23, options=None, type=None), _descriptor.EnumValueDescriptor( - name='UINT8', index=10, number=31, + name='INT64', index=10, number=24, options=None, type=None), _descriptor.EnumValueDescriptor( - name='UINT16', index=11, number=32, + name='UINT8', index=11, number=31, options=None, type=None), _descriptor.EnumValueDescriptor( - name='UINT32', index=12, number=33, + name='UINT16', index=12, number=32, options=None, type=None), _descriptor.EnumValueDescriptor( - name='UINT64', index=13, number=34, + name='UINT32', index=13, number=33, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='UINT64', index=14, number=34, options=None, type=None), ], containing_type=None, options=None, serialized_start=4816, - serialized_end=4994, + serialized_end=5008, ) _sym_db.RegisterEnumDescriptor(_DATATYPE) @@ -100,6 +104,7 @@ FLOAT16 = 10 FLOAT32 = 11 FLOAT64 = 12 +BFLOAT16 = 13 INT8 = 21 INT16 = 22 INT32 = 23 diff --git a/coremltools/test/api/test_api_examples.py b/coremltools/test/api/test_api_examples.py index 758bd049e..1c54b3853 100644 --- a/coremltools/test/api/test_api_examples.py +++ b/coremltools/test/api/test_api_examples.py @@ -6,6 +6,7 @@ import copy import os import tempfile +from collections import Counter import numpy as np import pytest @@ -13,7 +14,8 @@ import coremltools as ct from coremltools._deps import _HAS_TORCH from coremltools.converters.mil import Builder as mb -from coremltools.converters.mil.mil import Function, Program, get_new_symbol +from coremltools.converters.mil import mil +from coremltools.converters.mil.mil import Function, get_new_symbol from coremltools.converters.mil.testing_utils import get_op_types_in_program if _HAS_TORCH: @@ -57,7 +59,7 @@ def test_unsanitized_input_name_during_prediction(convert_to): ''' input name : "x/0" becomes "x_0" due to name sanitization applied during conversion ''' - prog = Program() + prog = mil.Program() func_inputs = {"x/0": mb.placeholder(shape=[2, 3]), "y": mb.placeholder(shape=[2, 3])} with Function(func_inputs) as ssa_fun: @@ -79,7 +81,7 @@ def test_unsanitized_input_name_during_prediction(convert_to): @staticmethod def _test_variant_input_type_prediction(to_tensor, convert_to): - prog = Program() + prog = mil.Program() func_inputs = {"x": mb.placeholder(shape=[2, 3]), "y": mb.placeholder(shape=[2, 3])} with Function(func_inputs) as ssa_fun: @@ -167,7 +169,7 @@ def prog(x): @staticmethod @pytest.mark.skipif(not ct.utils._is_macos(), reason="Platform is not Mac OS") def test_deepcopy_error_with_symbols_in_prog(): - prog = Program() + prog = mil.Program() func_inputs = {"x": mb.placeholder(shape=[get_new_symbol(), 3]), "y": mb.placeholder(shape=[2, 3])} with Function(func_inputs) as ssa_fun: @@ -390,12 +392,21 @@ def test_skip_passes_in_different_pipelines(self): convert_to="mlprogram", pass_pipeline=pipeline, ) - assert ( - get_op_types_in_program( - model_converted._get_mil_internal(), skip_const_ops=False - ).count("const") - == 24 - ) + + op_types = get_op_types_in_program(model_converted._mil_program, skip_const_ops=False) + expected_counts = { + "const": 26, + "cast": 7, + "conv": 1, + "matmul": 1, + "add": 1, + "shape": 1, + "slice_by_index": 2, + "concat": 1, + "reshape": 1, + "leaky_relu": 1, + } + assert Counter(op_types) == expected_counts def test_empty_pipeline(self): model = self._get_test_model() @@ -491,7 +502,7 @@ def test_pass_option_skip_const_by_size(self): get_op_types_in_program( model_converted._get_mil_internal(), skip_const_ops=False ).count("const") - == 23 + == 25 ) def test_pass_unsupported_option(self): diff --git a/coremltools/test/api/test_api_visibilities.py b/coremltools/test/api/test_api_visibilities.py index 235990359..b783828bc 100644 --- a/coremltools/test/api/test_api_visibilities.py +++ b/coremltools/test/api/test_api_visibilities.py @@ -52,7 +52,7 @@ class TestApiVisibilities: def test_top_level(self): if not ct.utils._is_macos(): - EXPECTED_MODULES.remove("libcoremlpython") + EXPECTED_MODULES.remove("libcoremlpython") _check_visible_modules(_get_visible_items(ct), EXPECTED_MODULES) def test_utils(self): diff --git a/coremltools/test/modelpackage/test_modelpackage.py b/coremltools/test/modelpackage/test_modelpackage.py index e64a4ca55..2f3f0591a 100644 --- a/coremltools/test/modelpackage/test_modelpackage.py +++ b/coremltools/test/modelpackage/test_modelpackage.py @@ -3,6 +3,7 @@ # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause +import json import os import shutil import tempfile @@ -12,16 +13,19 @@ import coremltools from coremltools import ComputeUnit, utils -from coremltools._deps import _HAS_TORCH +from coremltools._deps import _HAS_EXECUTORCH, _HAS_TORCH from coremltools.converters.mil import Builder as mb from coremltools.libmodelpackage import ModelPackage -from coremltools.models import MLModel +from coremltools.models import _METADATA_VERSION, MLModel from coremltools.models.utils import _MLPACKAGE_AUTHOR_NAME, _WEIGHTS_DIR_NAME from coremltools.proto import Model_pb2 if _HAS_TORCH: import torch +if _HAS_EXECUTORCH: + import executorch.exir + def _remove_path(path): if os.path.isdir(path): @@ -265,6 +269,77 @@ def test_save_in_place(self): _remove_path(package.name) + @pytest.mark.skipif(not _HAS_EXECUTORCH, reason="requires ExecuTorch") + def test_save_EXIR_debug_handle(self): + """ + If we update EXIR debug handle serialization, we should update this test as well + """ + INPUT_SHAPE = (2, 10) + LINEAR_SHAPE = (INPUT_SHAPE[-1], 20) + + class TestModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear = torch.nn.Linear(*LINEAR_SHAPE) + + def forward(self, x): + return self.linear(x) + + def _compare_loaded_debug_handle_mapping_with_original(package): + debug_handle_mapping_json_path = os.path.join( + package, "executorch_debug_handle_mapping.json" + ) + assert os.path.exists(debug_handle_mapping_json_path) + with open(debug_handle_mapping_json_path, "r") as f: + loaded_debug_handle_mapping = json.load(f) + assert loaded_debug_handle_mapping == debug_handle_mapping + + def _compare_prediction_with_torch(coreml_model, torch_model): + x = torch.rand(2, 10) + coreml_x = {list(coreml_model.input_description)[0]: x.numpy()} + + coreml_preds = coreml_model.predict(coreml_x) + assert coreml_preds is not None + coreml_y = list(coreml_preds.values())[0] + + torch_y = torch_model(x).detach().numpy() + np.testing.assert_allclose(coreml_y, torch_y, rtol=1e-6, atol=1e-6) + + torch_model = TestModule() + torch_model.eval() + + example_input = (torch.rand(*INPUT_SHAPE),) + exir_program_aten = torch.export.export(torch_model, example_input) + exir_program_edge = executorch.exir.to_edge(exir_program_aten).exported_program() + + coreml_model = coremltools.convert( + exir_program_edge, compute_precision=coremltools.precision.FLOAT32 + ) + debug_handle_mapping = [ + {_METADATA_VERSION: coreml_model.user_defined_metadata[_METADATA_VERSION]}, + { + str(k): v + for k, v in coreml_model._mil_program.construct_debug_handle_to_ops_mapping().items() + }, + ] + + with tempfile.TemporaryDirectory(suffix=".mlpackage") as package0: + coreml_model.save(package0) + loaded_model0 = MLModel(package0) + if utils._macos_version() >= (12, 0): + _compare_prediction_with_torch(loaded_model0, torch_model) + _compare_loaded_debug_handle_mapping_with_original(package0) + + with tempfile.TemporaryDirectory(suffix=".mlpackage") as package1: + loaded_model0.save(package1) + loaded_model1 = MLModel(package1) + if utils._macos_version() >= (12, 0): + _compare_prediction_with_torch(loaded_model1, torch_model) + # Although debug handle info will be lost in loaded model due to we do not + # deserialize executorch_debug_handle_mapping.json, package1 will still have + # executorch_debug_handle_mapping.json, which is copied from package0 + _compare_loaded_debug_handle_mapping_with_original(package1) + @pytest.mark.skipif(not _HAS_TORCH, reason="requires torch") def test_mil_as_package(self): num_tokens = 3 diff --git a/coremltools/test/neural_network/test_numpy_nn_layers.py b/coremltools/test/neural_network/test_numpy_nn_layers.py index c939b3cc1..8d7c77fa3 100644 --- a/coremltools/test/neural_network/test_numpy_nn_layers.py +++ b/coremltools/test/neural_network/test_numpy_nn_layers.py @@ -5016,6 +5016,9 @@ def test_gather_cpu(self, cpu_only=True): ) def test_gather_gpu(self): + # This test can be stochastically failing, so we set the below seed: + np.random.seed(0) + pytest.xfail("rdar://124260627 ([CI] Two tests are random failing on CI)") self.test_gather_cpu(cpu_only=False) def test_gather_along_axis_cpu(self, cpu_only=True): diff --git a/coremltools/test/neural_network/test_tf_numeric.py b/coremltools/test/neural_network/test_tf_numeric.py index b6ff42ff9..e248a98c2 100644 --- a/coremltools/test/neural_network/test_tf_numeric.py +++ b/coremltools/test/neural_network/test_tf_numeric.py @@ -396,6 +396,8 @@ def test_resize_bilinear_cpu_only(self): @unittest.skipUnless(_macos_version() >= (10, 14), "Only supported on MacOS 10.14+") def test_crop_resize(self, cpu_only=False): + # This test can be stochastically failing, so we set the below seed: + np.random.seed(0) if _macos_version()[0] == 12: pytest.xfail("rdar://110274216") diff --git a/coremltools/test/optimize/coreml/test_post_training_quantization.py b/coremltools/test/optimize/coreml/test_post_training_quantization.py index b0d8e045a..e799fcec5 100644 --- a/coremltools/test/optimize/coreml/test_post_training_quantization.py +++ b/coremltools/test/optimize/coreml/test_post_training_quantization.py @@ -114,7 +114,7 @@ def create_sparse_weight(weight, target_sparsity): return np.reshape(weight, shape).astype(np.float32) -def verify_model_outputs(model, compressed_model, input_values): +def verify_model_outputs(model, compressed_model, input_values, rtol=1e-7, atol=0): """ This utility functions does the following checks: @@ -144,7 +144,8 @@ def verify_model_outputs(model, compressed_model, input_values): de_output_dict = decompressed_model.predict(input_values) for k, v in de_output_dict.items(): assert k in output_dict - np.testing.assert_allclose(v, output_dict[k]) + np.testing.assert_allclose(v, output_dict[k], rtol=rtol, atol=atol) + class TestLinearQuantizeWeights: @staticmethod @@ -720,7 +721,9 @@ def test_weight_decopmression_coreml_optimize(): pipeline = ct.PassPipeline.DEFAULT_PRUNING - pipeline.insert_pass(1, "compression::palettize_weights") + # Add a palettization pass after the pruning pass. + prune_pass_idx = pipeline.passes.index("compression::prune_weights") + pipeline.insert_pass(prune_pass_idx + 1, "compression::palettize_weights") config = cto.coreml.OptimizationConfig( global_config=cto.coreml.OpPalettizerConfig(mode="unique"), ) @@ -773,7 +776,9 @@ def test_convert_sparse_and_palettized_source_model_custom(): pipeline = ct.PassPipeline.DEFAULT_PRUNING - pipeline.insert_pass(1, "compression::palettize_weights") + # Add a palettization pass after the pruning pass. + prune_pass_idx = pipeline.passes.index("compression::prune_weights") + pipeline.insert_pass(prune_pass_idx + 1, "compression::palettize_weights") config = cto.coreml.OptimizationConfig( global_config=cto.coreml.OpPalettizerConfig(mode="unique"), ) diff --git a/coremltools/test/pipeline/test_pipeline.py b/coremltools/test/pipeline/test_pipeline.py index 3874f9387..b0b438c5e 100644 --- a/coremltools/test/pipeline/test_pipeline.py +++ b/coremltools/test/pipeline/test_pipeline.py @@ -12,6 +12,7 @@ import coremltools as ct from coremltools._deps import _HAS_LIBSVM, _HAS_SKLEARN +from coremltools.converters.mil import mil from coremltools.converters.mil.mil import Builder as mb from coremltools.converters.mil.mil import Function, Program from coremltools.models.pipeline import PipelineClassifier, PipelineRegressor @@ -238,7 +239,7 @@ def _make_model(input_name, input_length, weight_tensor = np.arange(input_length * output_length, dtype='float32') weight_tensor = weight_tensor.reshape(output_length, input_length) - prog = Program() + prog = mil.Program() func_inputs = {input_name: mb.placeholder(shape=(input_length,))} with Function(func_inputs) as ssa_fun: input = ssa_fun.inputs[input_name] @@ -319,7 +320,7 @@ def test_compute_unit(): @staticmethod def test_second_model_needs_pipeline_input(): # First model takes one parameter - p1 = Program() + p1 = mil.Program() func_inputs = {'x1': mb.placeholder(shape=(2,))} with Function(func_inputs) as ssa_fun: x1 = ssa_fun.inputs['x1'] @@ -330,7 +331,7 @@ def test_second_model_needs_pipeline_input(): # Second model takes two parameters. One will be from previous model in pipeline. # The other as pipeline input. - p2 = Program() + p2 = mil.Program() func_inputs = { 'y1': mb.placeholder(shape=(2,)), 'x2': mb.placeholder(shape=(2,)), diff --git a/coremltools/version.py b/coremltools/version.py index c21924b8c..a5f2975a9 100644 --- a/coremltools/version.py +++ b/coremltools/version.py @@ -4,4 +4,4 @@ # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause -__version__ = "7.1" # VERSION_STRING +__version__ = "7.1.2" # VERSION_STRING diff --git a/docs/source/coremltools.converters.mil.mil.passes.defs.rst b/docs/source/coremltools.converters.mil.mil.passes.defs.rst index a32d4e3ad..eacc0a398 100644 --- a/docs/source/coremltools.converters.mil.mil.passes.defs.rst +++ b/docs/source/coremltools.converters.mil.mil.passes.defs.rst @@ -12,6 +12,7 @@ cleanup .. autoclass:: const_elimination .. autoclass:: dead_code_elimination .. autoclass:: dedup_op_and_var_names + .. autoclass:: expand_dynamic_linear .. autoclass:: fuse_reduce_mean .. autoclass:: loop_invariant_elimination .. autoclass:: noop_elimination @@ -63,6 +64,7 @@ optimize_linear .. autoclass:: fuse_linear_bias .. autoclass:: fuse_matmul_weight_bias + .. autoclass:: fuse_transpose_matmul optimize_normalization @@ -78,6 +80,7 @@ optimize_quantization .. automodule:: coremltools.converters.mil.mil.passes.defs.optimize_quantization + .. autoclass:: merge_affine_dequantize_with_consecutive_ops .. autoclass:: int_op_canonicalization .. autoclass:: nullify_redundant_quantization_zero_point .. autoclass:: dequantize_quantize_pair_elimination diff --git a/mlmodel/build/format/MIL.pb.cc b/mlmodel/build/format/MIL.pb.cc index d481c3ade..c265cff06 100644 --- a/mlmodel/build/format/MIL.pb.cc +++ b/mlmodel/build/format/MIL.pb.cc @@ -316,6 +316,7 @@ bool DataType_IsValid(int value) { case 10: case 11: case 12: + case 13: case 21: case 22: case 23: diff --git a/mlmodel/build/format/MIL.pb.h b/mlmodel/build/format/MIL.pb.h index 1d20d6cea..6776cba76 100644 --- a/mlmodel/build/format/MIL.pb.h +++ b/mlmodel/build/format/MIL.pb.h @@ -178,6 +178,7 @@ enum DataType { FLOAT16 = 10, FLOAT32 = 11, FLOAT64 = 12, + BFLOAT16 = 13, INT8 = 21, INT16 = 22, INT32 = 23, diff --git a/mlmodel/build/format/MIL_enums.h b/mlmodel/build/format/MIL_enums.h index e3a2a2a6f..911353d4f 100644 --- a/mlmodel/build/format/MIL_enums.h +++ b/mlmodel/build/format/MIL_enums.h @@ -7,6 +7,7 @@ enum MLDataType: int { MLDataTypeFLOAT16 = 10, MLDataTypeFLOAT32 = 11, MLDataTypeFLOAT64 = 12, + MLDataTypeBFLOAT16 = 13, MLDataTypeINT8 = 21, MLDataTypeINT16 = 22, MLDataTypeINT32 = 23, diff --git a/reqs/test.pip b/reqs/test.pip index 784ce6769..86cb7a346 100644 --- a/reqs/test.pip +++ b/reqs/test.pip @@ -24,27 +24,15 @@ scipy==1.9.2; python_version == '3.11' six sympy > 1.6 gast==0.4.0 -torch==2.1.0 -torchaudio==2.1.0 -torchvision==0.16.0 +torch==2.2.0 +torchaudio==2.2.0 +torchvision==0.17.0 xgboost==1.4.2; platform_machine != "arm64" mock wrapt tqdm pytest-timeout -# TensorFlow (x86) related package -tensorflow==2.12.0; platform_machine != "arm64" -tensorflow-estimator==2.12.0; platform_machine != "arm64" -keras==2.12.0; platform_machine != "arm64" - -# TensorFlow (arm64) related package. Currently no Python 3.11 support. -tensorflow-macos==2.11.0; platform_machine == "arm64" and python_version < "3.11" -tensorflow-estimator==2.11.0; platform_machine == "arm64" and python_version < "3.11" -keras==2.11.0; platform_machine == "arm64" and python_version < "3.11" - -tensorflow-addons==0.19.0; python_version < "3.11" -tensorflow-hub==0.12.0 transformers==4.26.0 # coremltools.optimize.torch diff --git a/reqs/test_tf2.pip b/reqs/test_tf2.pip new file mode 100644 index 000000000..b178018ad --- /dev/null +++ b/reqs/test_tf2.pip @@ -0,0 +1,16 @@ +-r ./test.pip + +# TODO(rdar://123269464): Support a recent version of TensorFlow + +# TensorFlow (x86) related package +tensorflow==2.12.0; platform_machine != "arm64" +tensorflow-estimator==2.12.0; platform_machine != "arm64" +keras==2.12.0; platform_machine != "arm64" + +# TensorFlow (arm64) related package. Currently no Python 3.11 support. +tensorflow-macos==2.11.0; platform_machine == "arm64" and python_version < "3.11" +tensorflow-estimator==2.11.0; platform_machine == "arm64" and python_version < "3.11" +keras==2.11.0; platform_machine == "arm64" and python_version < "3.11" + +tensorflow-addons==0.19.0; python_version < "3.11" +tensorflow-hub==0.12.0 diff --git a/scripts/build.sh b/scripts/build.sh index bc311e987..56998f8b0 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -107,6 +107,9 @@ CMAKE_COMMAND="" if [[ $OSTYPE == darwin* ]]; then CMAKE_COMMAND="xcrun --sdk ${sdk} " fi +if [ -z "`which cmake`" ] || [ "`which cmake`" = "cmake not found" ]; then + conda install cmake -y +fi CMAKE_COMMAND+="cmake $ADDITIONAL_CMAKE_OPTIONS \ -DCMAKE_BUILD_TYPE=$BUILD_MODE \ -DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \ diff --git a/scripts/test.sh b/scripts/test.sh index 3389ed0cb..7265b05c3 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -32,6 +32,7 @@ print_help() { echo " --wheel-path=* Specify which wheel to test. Otherwise, test the current coremltools dir." echo " --xml-path=* Path to test xml file." echo " --test-package=* Test package to run." + echo " --ignores=* Test packages to ignore" echo " --python=* Python to use for configuration." echo " --requirements=* [Optional] Path to the requirements.txt file." echo " --cov=* Generate coverage report for these dirs." @@ -50,6 +51,7 @@ while [ $# -gt 0 ] --requirements=*) REQUIREMENTS=${1##--requirements=} ;; --python=*) PYTHON=${1##--python=} ;; --test-package=*) TEST_PACKAGE=${1##--test-package=} ;; + --ignores=*) IGNORES=${1##--ignores=} ;; --wheel-path=*) WHEEL_PATH=${1##--wheel-path=} ;; --xml-path=*) XML_PATH=${1##--xml-path=} ;; --cov=*) COV=${1##--cov=} ;; @@ -102,21 +104,29 @@ fi # Now run the tests echo "Running tests" -TEST_CMD=($PYTEST_EXECUTABLE -v -ra -W "ignore::UserWarning" -W "ignore::FutureWarning" -W "ignore::DeprecationWarning" -W "ignore::ResourceWarning" --durations=100 --pyargs ${TEST_PACKAGE} --junitxml=${XML_PATH} --timeout=${TIME_OUT}) +TEST_CMD=$PYTEST_EXECUTABLE" -v -ra -W \"ignore::UserWarning\" -W \"ignore::FutureWarning\" -W \"ignore::DeprecationWarning\" -W \"ignore::ResourceWarning\" --durations=100" +TEST_CMD+=" --junitxml="${XML_PATH} +TEST_CMD+=" --timeout="${TIME_OUT} +TEST_CMD+=" --pyargs "${TEST_PACKAGE//,/ } + +IFS=',' read -A ignore_array <<< "${IGNORES}" +for ignore in ${ignore_array[@]}; do + TEST_CMD+=" --ignore "${CONDA_PREFIX}"/lib/python"${PYTHON}"/site-packages/"${ignore} +done if [[ $SLOW != 1 || $FAST != 1 ]]; then if [[ $SLOW == 1 ]]; then - TEST_CMD+=(-m "slow") + TEST_CMD+=" -m \"slow\"" elif [[ $FAST == 1 ]]; then - TEST_CMD+=(-m "not slow") + TEST_CMD+=" -m \"not slow\"" fi fi if [[ $COV != "" ]]; then - TEST_CMD+=(--cov $COV) + TEST_CMD+=" --cov ${COV}" fi -echo $TEST_CMD -${TEST_CMD[@]} +echo ${TEST_CMD} +eval ${TEST_CMD} pip uninstall -y coremltools