From 6c698cff939cbf1b19980a08bfdd53e17c6cef6b Mon Sep 17 00:00:00 2001 From: Songhao Jia Date: Fri, 3 May 2024 11:36:24 -0700 Subject: [PATCH] introduce _to_dim_order_copy op to runtime (#1970) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/1970 This diff creates a new and special operator, `_to_dim_order_copy`. This new operator introduces two critical attributes to runtime system: 1. Extract memory_format information from tensor based on dim_order, instead of stride. 2. Support both channal_last and contiguous memory_format in runtime. Please note that memory format here is a parallel concept with memory layout, and supporting new format does not violate our contract on only supporting contiguous memory layout tensor. Details can be found in [here](https://discuss.pytorch.org/t/contigious-vs-non-contigious-tensor/30107) and [here](https://pytorch.org/blog/tensor-memory-format-matters/). Furthermore, dim order is a specifial operator, which does not have a native aten variant but is needed by most model in edge dialect, so it can not directly be put in `kernels/portable/custom_ops.yaml` (need manually registered everytime, not work for an operator needed by many models), or `kernels/portable/functions.yaml` (should have native aten variant). To overcome that, this diff puts `_to_dim_order_copy`'s aten mode under `kernels/aten`, while lean mode under `kernels/portable/functions.yaml`. Also update dependencies and utils. Differential Revision: https://internalfb.com/D53747744 --- kernels/aten/cpu/TARGETS | 8 + kernels/aten/cpu/op__to_dim_order_copy.cpp | 124 ++++ kernels/aten/cpu/targets.bzl | 41 ++ kernels/aten/cpu/util/TARGETS | 8 + kernels/aten/cpu/util/copy_ops_util.cpp | 68 ++ kernels/aten/cpu/util/copy_ops_util.h | 23 + kernels/aten/cpu/util/targets.bzl | 28 + kernels/aten/edge_dialect_aten_op.yaml | 8 + kernels/aten/targets.bzl | 33 +- .../portable/cpu/op__to_dim_order_copy.cpp | 120 ++++ kernels/portable/cpu/targets.bzl | 6 + kernels/portable/cpu/util/copy_ops_util.cpp | 40 ++ kernels/portable/cpu/util/copy_ops_util.h | 6 + kernels/portable/functions.yaml | 5 + kernels/test/op__to_dim_order_copy_test.cpp | 650 ++++++++++++++++++ kernels/test/targets.bzl | 1 + runtime/core/exec_aten/exec_aten.h | 7 + .../kernels/portable/op_registration_util.bzl | 24 +- shim/xplat/executorch/kernels/test/util.bzl | 2 + 19 files changed, 1190 insertions(+), 12 deletions(-) create mode 100644 kernels/aten/cpu/TARGETS create mode 100644 kernels/aten/cpu/op__to_dim_order_copy.cpp create mode 100644 kernels/aten/cpu/targets.bzl create mode 100644 kernels/aten/cpu/util/TARGETS create mode 100644 kernels/aten/cpu/util/copy_ops_util.cpp create mode 100644 kernels/aten/cpu/util/copy_ops_util.h create mode 100644 kernels/aten/cpu/util/targets.bzl create mode 100644 kernels/aten/edge_dialect_aten_op.yaml create mode 100644 kernels/portable/cpu/op__to_dim_order_copy.cpp create mode 100644 kernels/test/op__to_dim_order_copy_test.cpp diff --git a/kernels/aten/cpu/TARGETS b/kernels/aten/cpu/TARGETS new file mode 100644 index 00000000000..2341af9282f --- /dev/null +++ b/kernels/aten/cpu/TARGETS @@ -0,0 +1,8 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() diff --git a/kernels/aten/cpu/op__to_dim_order_copy.cpp b/kernels/aten/cpu/op__to_dim_order_copy.cpp new file mode 100644 index 00000000000..63a301531d9 --- /dev/null +++ b/kernels/aten/cpu/op__to_dim_order_copy.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +namespace torch { +namespace executor { +namespace native { + +using Tensor = exec_aten::Tensor; +using SizesArrayRef = exec_aten::ArrayRef; +using DimOrderArrayRef = exec_aten::ArrayRef; +using MemoryFormat = exec_aten::MemoryFormat; + +template +using OptionalArrayRef = exec_aten::OptionalArrayRef; + +template +using Optional = exec_aten::optional; + +namespace { +Optional get_memory_format(OptionalArrayRef dim_order) { + if (!dim_order.has_value()) { + return exec_aten::nullopt; + } + if (is_contiguous_dim_order( + dim_order.value().data(), dim_order.value().size())) { + return MemoryFormat::Contiguous; + } else if (is_channels_last_dim_order( + dim_order.value().data(), dim_order.value().size())) { + return MemoryFormat::ChannelsLast; + } else { + ET_ASSERT_UNREACHABLE(); + } +} + +bool check__to_dim_order_copy_args( + const Tensor& input, + bool non_blocking, + exec_aten::OptionalArrayRef dim_order, + Tensor& out) { + // Right now we only support blocking data transfer + ET_LOG_AND_RETURN_IF_FALSE(non_blocking == false); + + // dim_order is set, the target dim_order will be either contiguous or + // channels_last memory format + if (dim_order.has_value()) { + exec_aten::ArrayRef dim_order_ref = dim_order.value(); + + // dim order size shall equal to input dim + ET_LOG_AND_RETURN_IF_FALSE(dim_order_ref.size() == input.dim()); + + ET_LOG_AND_RETURN_IF_FALSE( + is_channels_last_dim_order( + dim_order.value().data(), dim_order.value().size()) || + is_contiguous_dim_order( + dim_order.value().data(), dim_order.value().size())); + + // Out Aten tensor shall have same memory format stride as dim_order + const size_t kMaxNumOfDimensions = 16; + ET_LOG_AND_RETURN_IF_FALSE(kMaxNumOfDimensions >= out.dim()); + exec_aten::StridesType target_strides[kMaxNumOfDimensions]; + dim_order_to_stride_nocheck( + out.sizes().data(), + dim_order_ref.data(), + dim_order_ref.size(), + target_strides); + ET_LOG_AND_RETURN_IF_FALSE(out.dim() == dim_order_ref.size()); + for (size_t i = 0; i < dim_order_ref.size(); i++) { + ET_LOG_AND_RETURN_IF_FALSE(target_strides[i] == out.strides()[i]); + } + + } else { // dim_order is not set, preserve the dim order of input + + auto out_strides = out.strides(); + auto input_strides = input.strides(); + ET_LOG_AND_RETURN_IF_FALSE(input_strides.size() == out_strides.size()); + for (size_t i = 0; i < input_strides.size(); i++) { + ET_LOG_AND_RETURN_IF_FALSE(input_strides[i] == out_strides[i]); + } + } + return true; +} +} // namespace + +// _to_dim_order_copy.out(Tensor self, *, bool non_blocking=False, int[]? +// dim_order=None, Tensor(a!) out) -> Tensor(a!) +Tensor& _to_dim_order_copy_out( + RuntimeContext& ctx, + const Tensor& self, + bool non_blocking, + OptionalArrayRef dim_order, + Tensor& out) { + // TODO(T181345875): enable sanity check in aten mode + ET_KERNEL_CHECK( + ctx, + check__to_dim_order_copy_args(self, non_blocking, dim_order, out), + InvalidArgument, + out); + + Optional memory_format = get_memory_format(dim_order); + at::_to_copy_outf(self, non_blocking, memory_format, out); + + return out; +} + +Tensor& _to_dim_order_copy_out( + const Tensor& self, + bool non_blocking, + OptionalArrayRef dim_order, + Tensor& out) { + exec_aten::RuntimeContext ctx{}; + return _to_dim_order_copy_out(ctx, self, non_blocking, dim_order, out); +} + +} // namespace native +} // namespace executor +} // namespace torch diff --git a/kernels/aten/cpu/targets.bzl b/kernels/aten/cpu/targets.bzl new file mode 100644 index 00000000000..bdd93bda9ed --- /dev/null +++ b/kernels/aten/cpu/targets.bzl @@ -0,0 +1,41 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load("@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", "define_op_target", "op_target") + +# Operators that are listed in `functions.yaml`, and are thus compatible with +# the core ATen operators. Every entry here will be backed by a cxx_library +# target with the given name and deps. +# +# Note that a single target (or single .cpp file) can't mix ATen and non-ATen +# ops, and must be split. They can, however, share common code via a library dep +# if necessary. +_EDGE_DIALECT_OPS = ( + op_target( + name = "op__to_dim_order_copy", + deps = [ + "//executorch/kernels/aten/cpu/util:copy_ops_util", + ], + ), +) + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + + # Define build targets for all operators registered in the tables above. + for op in _EDGE_DIALECT_OPS: + define_op_target(is_aten_op = False, is_et_op = False, **op) + + all_op_targets = [":{}".format(op["name"]) for op in _EDGE_DIALECT_OPS] + + runtime.cxx_library( + name = "cpu", + srcs = [], + visibility = [ + "//executorch/kernels/aten/...", + "//executorch/kernels/test/...", + ], + exported_deps = [t + "_aten" for t in all_op_targets], + ) diff --git a/kernels/aten/cpu/util/TARGETS b/kernels/aten/cpu/util/TARGETS new file mode 100644 index 00000000000..2341af9282f --- /dev/null +++ b/kernels/aten/cpu/util/TARGETS @@ -0,0 +1,8 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() diff --git a/kernels/aten/cpu/util/copy_ops_util.cpp b/kernels/aten/cpu/util/copy_ops_util.cpp new file mode 100644 index 00000000000..b20fe127863 --- /dev/null +++ b/kernels/aten/cpu/util/copy_ops_util.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; + +bool check__to_dim_order_copy_args( + const Tensor& input, + bool non_blocking, + exec_aten::OptionalArrayRef dim_order, + Tensor& out) { + // Right now we only support blocking data transfer + ET_LOG_AND_RETURN_IF_FALSE(non_blocking == false); + + // dim_order is set, the target dim_order will be either contiguous or + // channels_last memory format + if (dim_order.has_value()) { + exec_aten::ArrayRef dim_order_ref = dim_order.value(); + + // dim order size shall equal to input dim + ET_LOG_AND_RETURN_IF_FALSE(dim_order_ref.size() == input.dim()); + + ET_LOG_AND_RETURN_IF_FALSE( + is_channels_last_dim_order( + dim_order.value().data(), dim_order.value().size()) || + is_contiguous_dim_order( + dim_order.value().data(), dim_order.value().size())); + + // Out Aten tensor shall have same memory format stride as dim_order + const size_t kMaxNumOfDimensions = 16; + ET_LOG_AND_RETURN_IF_FALSE(kMaxNumOfDimensions >= out.dim()); + exec_aten::StridesType target_strides[kMaxNumOfDimensions]; + dim_order_to_stride_nocheck( + out.sizes().data(), + dim_order_ref.data(), + dim_order_ref.size(), + target_strides); + ET_LOG_AND_RETURN_IF_FALSE(out.dim() == dim_order_ref.size()); + for (size_t i = 0; i < dim_order_ref.size(); i++) { + ET_LOG_AND_RETURN_IF_FALSE(target_strides[i] == out.strides()[i]); + } + + } else { // dim_order is not set, preserve the dim order of input + + auto out_strides = out.strides(); + auto input_strides = input.strides(); + ET_LOG_AND_RETURN_IF_FALSE(input_strides.size() == out_strides.size()); + for (size_t i = 0; i < input_strides.size(); i++) { + ET_LOG_AND_RETURN_IF_FALSE(input_strides[i] == out_strides[i]); + } + } + return true; +} + +} // namespace executor +} // namespace torch diff --git a/kernels/aten/cpu/util/copy_ops_util.h b/kernels/aten/cpu/util/copy_ops_util.h new file mode 100644 index 00000000000..6ddc7371006 --- /dev/null +++ b/kernels/aten/cpu/util/copy_ops_util.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace torch { +namespace executor { + +bool check__to_dim_order_copy_args( + const Tensor& input, + bool non_blocking, + exec_aten::OptionalArrayRef dim_order, + Tensor& out); + +} // namespace executor +} // namespace torch diff --git a/kernels/aten/cpu/util/targets.bzl b/kernels/aten/cpu/util/targets.bzl new file mode 100644 index 00000000000..d1ebddd7275 --- /dev/null +++ b/kernels/aten/cpu/util/targets.bzl @@ -0,0 +1,28 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + + # Utility functions that can be used by operators that perform reduction + runtime.cxx_library( + name = "copy_ops_util", + srcs = ["copy_ops_util.cpp"], + exported_headers = [ + "copy_ops_util.h", + ], + compiler_flags = ["-Wno-missing-prototypes"], + deps = [ + "//executorch/runtime/kernel:kernel_includes_aten", + "//executorch/runtime/core/exec_aten/util:tensor_util_aten", + ], + exported_preprocessor_flags = ["-DUSE_ATEN_LIB"], + visibility = [ + "//executorch/kernels/aten/cpu/...", + "//executorch/kernels/portable/cpu/...", + "//executorch/kernels/optimized/cpu/...", + ], + ) diff --git a/kernels/aten/edge_dialect_aten_op.yaml b/kernels/aten/edge_dialect_aten_op.yaml new file mode 100644 index 00000000000..016f8dbfab5 --- /dev/null +++ b/kernels/aten/edge_dialect_aten_op.yaml @@ -0,0 +1,8 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This yaml file contains operators that are defined by ExecuTorch and used in ATen mode. + +- func: dim_order_ops::_to_dim_order_copy.out(Tensor self, *, bool non_blocking=False, int[]? dim_order=None, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: torch::executor::_to_dim_order_copy_out diff --git a/kernels/aten/targets.bzl b/kernels/aten/targets.bzl index 519dfaf3484..2e007a16d7e 100644 --- a/kernels/aten/targets.bzl +++ b/kernels/aten/targets.bzl @@ -16,20 +16,51 @@ def define_common_targets(): ], ) + runtime.export_file( + name = "edge_dialect_aten_op.yaml", + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + ) + et_operator_library( name = "executorch_aten_ops", ops_schema_yaml_target = ":functions.yaml", define_static_targets = True, ) + runtime.cxx_library( + name = "operators_edge_dialect_aten", + srcs = [], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + exported_deps = [ + "//executorch/kernels/aten/cpu:cpu", + ], + ) + + et_operator_library( + name = "edge_dialect_aten_ops", + ops_schema_yaml_target = ":edge_dialect_aten_op.yaml", + define_static_targets = True, + ) + executorch_generated_lib( name = "generated_lib", aten_mode = True, deps = [ ":executorch_aten_ops", + ":edge_dialect_aten_ops", + ], + kernel_deps = [ + ":operators_edge_dialect_aten", ], - functions_yaml_target = None, + custom_ops_yaml_target = "//executorch/kernels/aten:edge_dialect_aten_op.yaml", define_static_targets = True, + custom_ops_requires_aot_registration = False, visibility = [ "//executorch/...", "@EXECUTORCH_CLIENTS", diff --git a/kernels/portable/cpu/op__to_dim_order_copy.cpp b/kernels/portable/cpu/op__to_dim_order_copy.cpp new file mode 100644 index 00000000000..8a6a65e7034 --- /dev/null +++ b/kernels/portable/cpu/op__to_dim_order_copy.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +namespace torch { +namespace executor { +namespace native { + +using Tensor = exec_aten::Tensor; +using SizesArrayRef = exec_aten::ArrayRef; +using DimOrderArrayRef = exec_aten::ArrayRef; +using MemoryFormat = exec_aten::MemoryFormat; + +template +using OptionalArrayRef = exec_aten::OptionalArrayRef; + +template +using Optional = exec_aten::optional; + +namespace { + +// TODO(T179241236): Update core/exec_aten/util/tensor_util.h to support dim +// order other than contiguous. +int64_t coordinateToIndexWithDimOrder( + const Tensor& self, + const size_t* cur_indices) { + int64_t index = 0; + exec_aten::StridesType strides[kTensorDimensionLimit]; + SizesArrayRef sizes = self.sizes(); + DimOrderArrayRef dim_order = self.dim_order(); + + dim_order_to_stride_nocheck( + sizes.data(), dim_order.data(), sizes.size(), strides); + for (size_t i = 0; i < self.dim(); ++i) { + index += cur_indices[i] * strides[i]; + } + return index; +} + +template +void _to_dim_order_copy_impl(const Tensor& self, Tensor& out) { + auto self_data = self.mutable_data_ptr(); + auto out_data = out.mutable_data_ptr(); + + size_t coordinate[kTensorDimensionLimit] = {0}; + + // Copy data from self to out index by index. Same index in self and out + // should have same value, no matter the order of dimensions. + for (ssize_t i = 0; i < self.numel(); i++) { + // Update the current indices. + for (ssize_t j = self.dim() - 1; j >= 0; j--) { + if (coordinate[j] + 1 < self.size(j)) { + coordinate[j]++; + break; + } else { + coordinate[j] = 0; + } + } + // Get the corresponding index of self_data and out_data by stride. + int64_t self_data_index = coordinateToIndexWithDimOrder(self, coordinate); + int64_t out_data_index = coordinateToIndexWithDimOrder(out, coordinate); + + out_data[out_data_index] = + static_cast(self_data[self_data_index]); + } +} +} // namespace + +// _to_dim_order_copy.out(Tensor self, *, bool non_blocking=False, int[]? +// dim_order=None, Tensor(a!) out) -> Tensor(a!) +Tensor& _to_dim_order_copy_out( + RuntimeContext& ctx, + const Tensor& self, + bool non_blocking, + OptionalArrayRef dim_order, + Tensor& out) { + (void)ctx; + ET_KERNEL_CHECK( + ctx, + check__to_dim_order_copy_args(self, non_blocking, dim_order, out), + InvalidArgument, + out); + + ET_KERNEL_CHECK( + ctx, + resize_tensor(out, self.sizes()) == torch::executor::Error::Ok, + InvalidArgument, + out); + + ET_SWITCH_REALHB_TYPES( + self.scalar_type(), ctx, "_to_dim_order_copy_out", CTYPE_IN, [&] { + ET_SWITCH_REALHB_TYPES( + out.scalar_type(), ctx, "_to_dim_order_copy_out", CTYPE_OUT, [&] { + _to_dim_order_copy_impl(self, out); + }); + }); + + return out; +} + +Tensor& _to_dim_order_copy_out( + const Tensor& self, + bool non_blocking, + OptionalArrayRef dim_order, + Tensor& out) { + exec_aten::RuntimeContext context{}; + return _to_dim_order_copy_out(context, self, non_blocking, dim_order, out); +} + +} // namespace native +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/targets.bzl b/kernels/portable/cpu/targets.bzl index c86ac2b9f6d..77796c68526 100644 --- a/kernels/portable/cpu/targets.bzl +++ b/kernels/portable/cpu/targets.bzl @@ -977,6 +977,12 @@ _ATEN_OPS = ( op_target( name = "op_zeros", ), + op_target( + name = "op__to_dim_order_copy", + deps = [ + "//executorch/kernels/portable/cpu/util:copy_ops_util", + ], + ), ) # Operators that are not listed in `functions.yaml` (i.e., operators listed in diff --git a/kernels/portable/cpu/util/copy_ops_util.cpp b/kernels/portable/cpu/util/copy_ops_util.cpp index 48038f175b9..314e38c2b53 100644 --- a/kernels/portable/cpu/util/copy_ops_util.cpp +++ b/kernels/portable/cpu/util/copy_ops_util.cpp @@ -9,6 +9,7 @@ #include #include +#include #include namespace torch { @@ -735,6 +736,45 @@ bool check_to_copy_args( return true; } +bool check__to_dim_order_copy_args( + const Tensor& input, + bool non_blocking, + exec_aten::OptionalArrayRef dim_order, + Tensor& out) { + // Right now we only support blocking data transfer + ET_LOG_AND_RETURN_IF_FALSE(non_blocking == false); + + if (dim_order.has_value()) { + exec_aten::ArrayRef dim_order_ref = dim_order.value(); + + // dim order size shall equal to input dim + ET_LOG_AND_RETURN_IF_FALSE(dim_order_ref.size() == input.dim()); + + ET_LOG_AND_RETURN_IF_FALSE( + is_channels_last_dim_order( + dim_order.value().data(), dim_order.value().size()) || + is_contiguous_dim_order( + dim_order.value().data(), dim_order.value().size())); + + // Out tensor shall have same dim order as dim_order + auto out_dim_order = out.dim_order(); + ET_LOG_AND_RETURN_IF_FALSE(out_dim_order.size() == dim_order_ref.size()); + for (size_t i = 0; i < dim_order_ref.size(); i++) { + ET_LOG_AND_RETURN_IF_FALSE(out_dim_order[i] == dim_order_ref[i]); + } + } else { // dim_order is not set, preserve the dim order of input + + // Out tensor shall have same dim order as input dim_order + auto out_dim_order = out.dim_order(); + auto input_dim_order = input.dim_order(); + ET_LOG_AND_RETURN_IF_FALSE(out_dim_order.size() == input_dim_order.size()); + for (size_t i = 0; i < input_dim_order.size(); i++) { + ET_LOG_AND_RETURN_IF_FALSE(out_dim_order[i] == input_dim_order[i]); + } + } + return true; +} + bool check_unsqueeze_copy_args( const Tensor input, int64_t dim, diff --git a/kernels/portable/cpu/util/copy_ops_util.h b/kernels/portable/cpu/util/copy_ops_util.h index 5f341b0c2b9..d5362ae373a 100644 --- a/kernels/portable/cpu/util/copy_ops_util.h +++ b/kernels/portable/cpu/util/copy_ops_util.h @@ -198,6 +198,12 @@ bool check_to_copy_args( exec_aten::optional memory_format, Tensor& out); +bool check__to_dim_order_copy_args( + const Tensor& input, + bool non_blocking, + exec_aten::OptionalArrayRef dim_order, + Tensor& out); + bool check_unsqueeze_copy_args( const Tensor input, int64_t dim, diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml index ca3f3c702a1..93256b2a05f 100644 --- a/kernels/portable/functions.yaml +++ b/kernels/portable/functions.yaml @@ -881,3 +881,8 @@ kernels: - arg_meta: null kernel_name: torch::executor::zeros_out + +- func: dim_order_ops::_to_dim_order_copy.out(Tensor self, *, bool non_blocking=False, int[]? dim_order=None, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: torch::executor::_to_dim_order_copy_out diff --git a/kernels/test/op__to_dim_order_copy_test.cpp b/kernels/test/op__to_dim_order_copy_test.cpp new file mode 100644 index 00000000000..e888e0fc7f6 --- /dev/null +++ b/kernels/test/op__to_dim_order_copy_test.cpp @@ -0,0 +1,650 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include // Declares the operator +#include +#include +#include +#include +#include + +#include + +using namespace ::testing; +using exec_aten::ArrayRef; +using exec_aten::optional; +using exec_aten::ScalarType; +using exec_aten::Tensor; +using torch::executor::testing::TensorFactory; + +// To further emphasize the accuracy of our op_to, we TEST_F the conversion +// from floating-point types to signed int types directly by the TEST_F cases +// generated by core Pytorch directly. Such data is random generated in [-5, 5]. + +// clang-format off +typedef std::map< + std::type_index, + std::variant< + std::vector, + std::vector>> + FloatingTypeToDataMap; + +typedef std::map< + std::type_index, + std::variant< + std::vector, + std::vector, + std::vector, + std::vector, + std::vector>> + IntTypeToDataMap; +// clang-format on + +class OpToDimOrderCopyTest : public OperatorTest { + protected: + Tensor& op__to_dim_order_copy_out( + const Tensor& self, + bool non_blocking, + exec_aten::optional> dim_order, + Tensor& out) { + return torch::executor::dim_order_ops::_to_dim_order_copy_outf( + context_, self, non_blocking, dim_order, out); + } + // Cast float vector to OUTPUT_CTYPE vector + template + std::vector vector_type_cast(std::vector input) { + std::vector output(input.size()); + std::transform( + input.begin(), input.end(), output.begin(), [](INPUT_CTYPE x) { + return static_cast(x); + }); + return output; + } + + template + struct ToTestCase { + const std::vector sizes; + const std::vector data_in; + const std::vector data_out; + }; + + // Each TEST_F has different combination of input and output types. Therefore + // it is a little bit mess if create template TEST_F case and custom data + // types for both input data and output data. We choose another way: for all + // TEST_F cases, their data are all in double. And we are gonna cast them into + // desired type when delievering them into tf.make function. Based on our + // experiments, type cast of core PyTorch is same as static_cast in c++ in the + // representable scope, so here we believe using static_cast to generate + // ground truth is reasonable. + template < + typename INPUT_CTYPE, + ScalarType INPUT_DTYPE, + typename OUTPUT_CTYPE, + ScalarType OUTPUT_DTYPE> + void test_runner_static_cast( + std::vector> test_cases) { + TensorFactory tf_in; + TensorFactory tf_out; + + for (const auto& test_case : test_cases) { + auto data_in = vector_type_cast(test_case.data_in); + auto data_out = vector_type_cast(data_in); + + Tensor input = tf_in.make(test_case.sizes, data_in); + Tensor output = tf_out.zeros_like(input); + + std::vector dim_order_vec; + for (int64_t i = 0; i < input.dim(); i++) { + dim_order_vec.push_back(i); + } + ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); + + Tensor ret = op__to_dim_order_copy_out( + /*self=*/input, + /*non_blocking=*/false, + dim_order, + output); + + Tensor expected = tf_out.make(test_case.sizes, data_out); + + // The original tensor a should share same value with the out variable and + // return variable of to function + EXPECT_TENSOR_EQ(ret, output); + EXPECT_TENSOR_EQ(ret, expected); + } + } + + template + void test_runner_to_bool( + std::vector test_case, + std::vector data_out) { + TensorFactory tf_in; + TensorFactory tf_out; + + auto data_in = vector_type_cast(test_case); + + Tensor input = tf_in.make({(int)test_case.size()}, data_in); + Tensor output = tf_out.zeros_like(input); + + std::vector dim_order_vec; + for (int i = 0; i < input.dim(); i++) { + dim_order_vec.push_back(i); + } + ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); + + Tensor ret = op__to_dim_order_copy_out( + /*self=*/input, + /*non_blocking=*/false, + dim_order, + output); + + Tensor expected = tf_out.make({(int)data_out.size()}, data_out); + + // The return value of op__to_dim_order_copy_out and the values written to + // output should be the same. + EXPECT_TENSOR_EQ(ret, output); + // The return value of op__to_dim_order_copy_out and the values in expected + // which are the reference values should be the same. + EXPECT_TENSOR_EQ(ret, expected); + } + + template + void test_runner_from_bool( + std::vector test_case, + std::vector out) { + TensorFactory tf_in; + TensorFactory tf_out; + + auto data_out = vector_type_cast(out); + + Tensor input = tf_in.make({(int)test_case.size()}, test_case); + Tensor output = tf_out.zeros_like(input); + + std::vector dim_order_vec; + for (int64_t i = 0; i < input.dim(); i++) { + dim_order_vec.push_back(i); + } + ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); + + Tensor ret = op__to_dim_order_copy_out( + /*self=*/input, + /*non_blocking=*/false, + dim_order, + output); + + Tensor expected = tf_out.make({(int)data_out.size()}, data_out); + + // The return value of op__to_dim_order_copy_out and the values written to + // output should be the same. + EXPECT_TENSOR_EQ(ret, output); + // The return value of op__to_dim_order_copy_out and the values in expected + // which are the reference values should be the same. + EXPECT_TENSOR_EQ(ret, expected); + } + + /* %python + import torch + torch.manual_seed(0) + x = torch.rand(2, 3) + res = x.to(non_blocking = False, memory_format = torch.preserve_format) + op = "op__to_dim_order_copy_out" + opt_setup_params = """ + bool non_blocking = false; + optional memory_format; + """ + opt_extra_params = "non_blocking, memory_format," + out_args = "out_shape, dynamism" + dtype = "ScalarType::Float" + check = "EXPECT_TENSOR_EQ" */ + + void test_dynamic_shape( + const std::vector& out_shape, + enum torch::executor::TensorShapeDynamism dynamism) { + /* %python + %rewrite(unary_op) */ + + TensorFactory tf; + + Tensor x = tf.make( + {2, 3}, + {0.49625658988952637, + 0.7682217955589294, + 0.08847743272781372, + 0.13203048706054688, + 0.30742281675338745, + 0.6340786814689636}); + Tensor expected = tf.make( + {2, 3}, + {0.49625658988952637, + 0.7682217955589294, + 0.08847743272781372, + 0.13203048706054688, + 0.30742281675338745, + 0.6340786814689636}); + + bool non_blocking = false; + + Tensor out = tf.zeros(out_shape, dynamism); + + std::vector dim_order_vec; + for (int64_t i = 0; i < x.dim(); i++) { + dim_order_vec.push_back(i); + } + ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); + + Tensor ret = op__to_dim_order_copy_out( + /*self=*/x, non_blocking, dim_order, out); + + EXPECT_TENSOR_EQ(out, expected); + EXPECT_TENSOR_EQ(ret, expected); + } + + template < + typename INPUT_CTYPE, + ScalarType INPUT_DTYPE, + typename OUTPUT_CTYPE, + ScalarType OUTPUT_DTYPE> + void test_runner_hardcode_data( + FloatingTypeToDataMap floating_point_data, + IntTypeToDataMap int_data) { + TensorFactory tf_in; + TensorFactory tf_out; + + if (typeid(OUTPUT_CTYPE) == typeid(uint8_t)) { + // Would cause underflow when testing uint8_t. + return; + } + + ToTestCase test_case = { + /*sizes=*/{3, 5}, /*data_in=*/ + std::get>( + floating_point_data[typeid(INPUT_CTYPE)]), + /*data_out=*/ + std::get>(int_data[typeid(OUTPUT_CTYPE)])}; + + Tensor input = tf_in.make(test_case.sizes, test_case.data_in); + Tensor output = tf_out.zeros_like(input); + + std::vector dim_order_vec; + for (int64_t i = 0; i < input.dim(); i++) { + dim_order_vec.push_back(i); + } + ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); + + Tensor ret = op__to_dim_order_copy_out( + /*self=*/input, + /*non_blocking=*/false, + dim_order, + output); + + Tensor expected = tf_out.make(test_case.sizes, test_case.data_out); + + // The original tensor a should share same value with the out variable and + // return variable of to function + EXPECT_TENSOR_EQ(ret, output); + EXPECT_TENSOR_EQ(ret, expected); + } +}; + +/* Here we temporary not try to implement or TEST_F the behavior about casting a + * number can not be represented in some type to this type (e.g. inf to + * int32_t nan to int64_t or 2147483648 to int32_t), because + * - a. The result of such kind of cast is undefined according to c++ + * standard; + * - b. No explicit rules can be found in core pytorch for such transaction + * (not same as static_cast or any other casting function in c++); + * - c. If user tries to cast a unrepresentable value to certain type, they + * should take the risk; + * - d. Even though we can always use if/switch to cover these boundry cases, + * the code will be lengthy and jumbled. I believe using these disordered + * code to meet some undefine behavior is meaningless, and we can not + * cover all such cases. + */ + +// Regular TEST_F for to_copy.out +// TEST_F if to_copy.out works well under all kinds of data pairs +TEST_F(OpToDimOrderCopyTest, AllDtypesSupported) { + std::vector> test_cases = { + { + /*sizes=*/{2, 4}, /*data_in=*/ + {2.11, 3.2, 2.3, 4.0, 1.1, 5.2, 1.1, 6.3}, /*data_out=*/ + {}, // data_out shouldn't be used in test_runner_static_cast + }, + { + /*sizes=*/{3, 4, 0, 5}, + /*data_in=*/{}, + /*data_out=*/{}, + }, + { + /*sizes=*/{}, + /*data_in=*/{10.0}, + /*data_out=*/{}, // data_out shouldn't be used in + // test_runner_static_cast + }, + }; + +#define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ + test_runner_static_cast< \ + INPUT_CTYPE, \ + ScalarType::INPUT_DTYPE, \ + OUTPUT_CTYPE, \ + ScalarType::OUTPUT_DTYPE>(test_cases); + +#define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ + ET_FORALL_REAL_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); + + ET_FORALL_REAL_TYPES(TEST_ENTRY); + +#undef TEST_ENTRY +#undef TEST_KERNEL +} + +TEST_F(OpToDimOrderCopyTest, BoolTests) { + std::vector test_case_to_bool = {1.1, 2.2, 0}; + std::vector result_to_bool = {true, true, false}; +#define TEST_TO_BOOL(INPUT_CTYPE, INPUT_DTYPE) \ + test_runner_to_bool( \ + test_case_to_bool, result_to_bool); + ET_FORALL_REAL_TYPES(TEST_TO_BOOL); + + std::vector test_case_from_bool = {true, true, false}; + std::vector result_from_bool = {1.0, 1.0, 0}; +#define TEST_FROM_BOOL(OUTPUT_CTYPE, OUTPUT_DTYPE) \ + test_runner_from_bool( \ + test_case_from_bool, result_from_bool); + ET_FORALL_REAL_TYPES(TEST_FROM_BOOL); +} + +TEST_F(OpToDimOrderCopyTest, NanInfSupported) { + constexpr auto floatInfinity = std::numeric_limits::infinity(); + std::vector> test_cases = {{ + /*sizes=*/{2, 4}, + /*data_in=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6}, + /*data_out=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6}, + }}; + +#define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ + test_runner_static_cast< \ + INPUT_CTYPE, \ + ScalarType::INPUT_DTYPE, \ + OUTPUT_CTYPE, \ + ScalarType::OUTPUT_DTYPE>(test_cases); + +#define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ + ET_FORALL_FLOAT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); + + ET_FORALL_FLOAT_TYPES(TEST_ENTRY); + +#undef TEST_ENTRY +#undef TEST_KERNEL +} + +TEST_F(OpToDimOrderCopyTest, HardcodeFloatConvertInt) { + // Hardcode input and output generated from core PyTorch + // clang-format off + std::vector float_data = { + -1.47900056838989257812, -4.59277725219726562500, + 2.15365791320800781250, -2.55494546890258789062, + 3.06999135017395019531, 3.27460670471191406250, + -3.98865103721618652344, -4.81065988540649414062, + 3.67902207374572753906, 3.72226405143737792969, + 0.80567771196365356445, 2.23788332939147949219, + -0.52035576105117797852, -1.58493483066558837891, + -0.30919688940048217773}; + + std::vector double_data = { + -1.47900053955270172068, -4.59277735274143061872, + 2.15365796963871947156, -2.55494554556038755422, + 3.06999137834642255029, 3.27460679459944969949, + -3.98865109243288795682, -4.81065977167646074975, + 3.67902198302105531980, 3.72226414774102742911, + 0.80567768667100203572, 2.23788335717029518435, + -0.52035578832931150828, -1.58493480710766210251, + -0.30919688936285893988}; + // clang-format on + + std::vector int64_data = { + -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; + std::vector int32_data = { + -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; + std::vector int16_data = { + -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; + std::vector int8_data = { + -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; + + // Gathering all floating point data together for better traversial + FloatingTypeToDataMap floating_point_data; + floating_point_data[typeid(float)] = float_data; + floating_point_data[typeid(double)] = double_data; + + // Gathering all int data together for better traversial + IntTypeToDataMap int_data; + int_data[typeid(int64_t)] = int64_data; + int_data[typeid(int32_t)] = int32_data; + int_data[typeid(int16_t)] = int16_data; + int_data[typeid(int8_t)] = int8_data; + +#define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ + test_runner_hardcode_data< \ + INPUT_CTYPE, \ + ScalarType::INPUT_DTYPE, \ + OUTPUT_CTYPE, \ + ScalarType::OUTPUT_DTYPE>(floating_point_data, int_data); + +#define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ + ET_FORALL_INT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); + + ET_FORALL_FLOAT_TYPES(TEST_ENTRY); +} + +TEST_F(OpToDimOrderCopyTest, MismatchedSizesDie) { + if (torch::executor::testing::SupportedFeatures::get()->is_aten) { + GTEST_SKIP() << "ATen kernel can handle mismatched sizes"; + } + TensorFactory tf; + Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); + Tensor out = tf.zeros({3, 2, 1, 1}); + std::vector dim_order_vec; + for (int64_t i = 0; i < input.dim(); i++) { + dim_order_vec.push_back(i); + } + ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); + + ET_EXPECT_KERNEL_FAILURE( + context_, + op__to_dim_order_copy_out( + /*self=*/input, + /*non_blocking=*/false, + dim_order, + out)); +} + +// Only contiguous memory is supported, the memory type MemoryFormat::Contiguous +// should not be allowed. The function is expected death if using the illegal +// memory format. +TEST_F(OpToDimOrderCopyTest, MismatchedMemoryFormatDies) { + if (torch::executor::testing::SupportedFeatures::get()->is_aten) { + GTEST_SKIP() << "ATen kernel can handle non contiguous memory formats"; + } + TensorFactory tf_in; + TensorFactory tf_out; + Tensor input = + tf_in.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); + Tensor out = tf_out.zeros({3, 1, 1, 2}); + + std::vector dim_order_vec; + for (int64_t i = 0; i < input.dim(); i++) { + dim_order_vec.push_back(i); + } + + // mutate dim_order_vec to create a illegal one. + dim_order_vec[1] = 3; + dim_order_vec[3] = 1; + ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); + + ET_EXPECT_KERNEL_FAILURE( + context_, + op__to_dim_order_copy_out( + /*self=*/input, + /*non_blocking=*/false, + dim_order, + out)); +} + +// Only blocking data transfer supported +TEST_F(OpToDimOrderCopyTest, MismatchedBlockingDie) { + if (torch::executor::testing::SupportedFeatures::get()->is_aten) { + GTEST_SKIP() << "ATen kernel can handle non blocking data transfer"; + } + TensorFactory tf; + Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); + Tensor out = tf.zeros(/*sizes=*/{3, 1, 1, 2}); + + std::vector dim_order_vec; + for (int64_t i = 0; i < input.dim(); i++) { + dim_order_vec.push_back(i); + } + ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); + + ET_EXPECT_KERNEL_FAILURE( + context_, + op__to_dim_order_copy_out( + /*self=*/input, + /*non_blocking=*/true, + dim_order, + out)); +} + +TEST_F(OpToDimOrderCopyTest, DynamicShapeUpperBoundSameAsExpected) { + test_dynamic_shape( + {2, 3}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND); +} + +TEST_F(OpToDimOrderCopyTest, DynamicShapeUpperBoundLargerThanExpected) { + test_dynamic_shape( + {10, 10}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND); +} + +TEST_F(OpToDimOrderCopyTest, DynamicShapeUnbound) { + if (!torch::executor::testing::SupportedFeatures::get()->output_resize) { + GTEST_SKIP() << "Dynamic shape unbound not supported"; + } + test_dynamic_shape( + {1, 1}, torch::executor::TensorShapeDynamism::DYNAMIC_UNBOUND); +} + +TEST_F(OpToDimOrderCopyTest, ContiguousToChannelsLast) { + TensorFactory tf; + + Tensor x = tf.make_with_dimorder( + {3, 5, 2, 2}, + {0.2432, 0.5248, 0.5361, 0.8513, 0.8184, 0.8206, 0.7357, 0.9655, 0.6138, + 0.1112, 0.2799, 0.1079, 0.9680, 0.2548, 0.0393, 0.6002, 0.2257, 0.8766, + 0.2715, 0.1595, 0.2029, 0.7026, 0.6982, 0.8529, 0.4405, 0.6560, 0.9217, + 0.6372, 0.2446, 0.6590, 0.3866, 0.7185, 0.4439, 0.5346, 0.3179, 0.4492, + 0.3491, 0.6970, 0.8456, 0.2516, 0.2345, 0.2924, 0.7695, 0.0911, 0.8530, + 0.8560, 0.6909, 0.7719, 0.8923, 0.5546, 0.6978, 0.8151, 0.3007, 0.3961, + 0.8416, 0.4296, 0.7203, 0.8963, 0.3597, 0.5552}); + + Tensor out = tf.full_channels_last({3, 5, 2, 2}, 0.0); + Tensor expected = tf.make_with_dimorder( + {3, 5, 2, 2}, + {0.2432, 0.8184, 0.6138, 0.9680, 0.2257, 0.5248, 0.8206, 0.1112, 0.2548, + 0.8766, 0.5361, 0.7357, 0.2799, 0.0393, 0.2715, 0.8513, 0.9655, 0.1079, + 0.6002, 0.1595, 0.2029, 0.4405, 0.2446, 0.4439, 0.3491, 0.7026, 0.6560, + 0.6590, 0.5346, 0.6970, 0.6982, 0.9217, 0.3866, 0.3179, 0.8456, 0.8529, + 0.6372, 0.7185, 0.4492, 0.2516, 0.2345, 0.8530, 0.8923, 0.3007, 0.7203, + 0.2924, 0.8560, 0.5546, 0.3961, 0.8963, 0.7695, 0.6909, 0.6978, 0.8416, + 0.3597, 0.0911, 0.7719, 0.8151, 0.4296, 0.5552}, + /*dim_order=*/{0, 2, 3, 1}); + + std::vector dim_order_vec = {0, 2, 3, 1}; + exec_aten::ArrayRef dim_order( + dim_order_vec.data(), dim_order_vec.size()); + Tensor ret = op__to_dim_order_copy_out( + /*self*/ x, /*non_blocking*/ false, /*dim_order*/ dim_order, out); + + EXPECT_TENSOR_EQ(out, expected); + EXPECT_TENSOR_EQ(ret, expected); +} + +TEST_F(OpToDimOrderCopyTest, ChannelsLastToContiguous) { + TensorFactory tf; + + Tensor out = tf.full({3, 5, 2, 2}, 0.0); + Tensor x = tf.make_with_dimorder( + {3, 5, 2, 2}, + {0.2432, 0.8184, 0.6138, 0.9680, 0.2257, 0.5248, 0.8206, 0.1112, 0.2548, + 0.8766, 0.5361, 0.7357, 0.2799, 0.0393, 0.2715, 0.8513, 0.9655, 0.1079, + 0.6002, 0.1595, 0.2029, 0.4405, 0.2446, 0.4439, 0.3491, 0.7026, 0.6560, + 0.6590, 0.5346, 0.6970, 0.6982, 0.9217, 0.3866, 0.3179, 0.8456, 0.8529, + 0.6372, 0.7185, 0.4492, 0.2516, 0.2345, 0.8530, 0.8923, 0.3007, 0.7203, + 0.2924, 0.8560, 0.5546, 0.3961, 0.8963, 0.7695, 0.6909, 0.6978, 0.8416, + 0.3597, 0.0911, 0.7719, 0.8151, 0.4296, 0.5552}, + /*dim_order=*/{0, 2, 3, 1}); + + Tensor expected = tf.make_with_dimorder( + {3, 5, 2, 2}, + {0.2432, 0.5248, 0.5361, 0.8513, 0.8184, 0.8206, 0.7357, 0.9655, 0.6138, + 0.1112, 0.2799, 0.1079, 0.9680, 0.2548, 0.0393, 0.6002, 0.2257, 0.8766, + 0.2715, 0.1595, 0.2029, 0.7026, 0.6982, 0.8529, 0.4405, 0.6560, 0.9217, + 0.6372, 0.2446, 0.6590, 0.3866, 0.7185, 0.4439, 0.5346, 0.3179, 0.4492, + 0.3491, 0.6970, 0.8456, 0.2516, 0.2345, 0.2924, 0.7695, 0.0911, 0.8530, + 0.8560, 0.6909, 0.7719, 0.8923, 0.5546, 0.6978, 0.8151, 0.3007, 0.3961, + 0.8416, 0.4296, 0.7203, 0.8963, 0.3597, 0.5552}); + + std::vector dim_order_vec = {0, 1, 2, 3}; + exec_aten::ArrayRef dim_order( + dim_order_vec.data(), dim_order_vec.size()); + Tensor ret = op__to_dim_order_copy_out( + /*self*/ x, /*non_blocking*/ false, /*dim_order*/ dim_order, out); + + EXPECT_TENSOR_EQ(out, expected); + EXPECT_TENSOR_EQ(ret, expected); +} + +TEST_F(OpToDimOrderCopyTest, PreserveChanneslLast) { + TensorFactory tf; + + Tensor out = tf.full_channels_last({3, 5, 2, 2}, 0.0); + Tensor x = tf.make_with_dimorder( + {3, 5, 2, 2}, + {0.2432, 0.8184, 0.6138, 0.9680, 0.2257, 0.5248, 0.8206, 0.1112, 0.2548, + 0.8766, 0.5361, 0.7357, 0.2799, 0.0393, 0.2715, 0.8513, 0.9655, 0.1079, + 0.6002, 0.1595, 0.2029, 0.4405, 0.2446, 0.4439, 0.3491, 0.7026, 0.6560, + 0.6590, 0.5346, 0.6970, 0.6982, 0.9217, 0.3866, 0.3179, 0.8456, 0.8529, + 0.6372, 0.7185, 0.4492, 0.2516, 0.2345, 0.8530, 0.8923, 0.3007, 0.7203, + 0.2924, 0.8560, 0.5546, 0.3961, 0.8963, 0.7695, 0.6909, 0.6978, 0.8416, + 0.3597, 0.0911, 0.7719, 0.8151, 0.4296, 0.5552}, + /*dim_order=*/{0, 2, 3, 1}); + + Tensor expected = tf.make_with_dimorder( + {3, 5, 2, 2}, + {0.2432, 0.8184, 0.6138, 0.9680, 0.2257, 0.5248, 0.8206, 0.1112, 0.2548, + 0.8766, 0.5361, 0.7357, 0.2799, 0.0393, 0.2715, 0.8513, 0.9655, 0.1079, + 0.6002, 0.1595, 0.2029, 0.4405, 0.2446, 0.4439, 0.3491, 0.7026, 0.6560, + 0.6590, 0.5346, 0.6970, 0.6982, 0.9217, 0.3866, 0.3179, 0.8456, 0.8529, + 0.6372, 0.7185, 0.4492, 0.2516, 0.2345, 0.8530, 0.8923, 0.3007, 0.7203, + 0.2924, 0.8560, 0.5546, 0.3961, 0.8963, 0.7695, 0.6909, 0.6978, 0.8416, + 0.3597, 0.0911, 0.7719, 0.8151, 0.4296, 0.5552}, + /*dim_order=*/{0, 2, 3, 1}); + + Tensor ret = op__to_dim_order_copy_out( + /*self*/ x, + /*non_blocking*/ false, + /*dim_order*/ exec_aten::nullopt, + out); + + EXPECT_TENSOR_EQ(out, expected); + EXPECT_TENSOR_EQ(ret, expected); +} diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl index 1daa66c58fa..1205ca3d87d 100644 --- a/kernels/test/targets.bzl +++ b/kernels/test/targets.bzl @@ -158,6 +158,7 @@ def define_common_targets(is_fbcode = False): codegen_function_header_wrapper("executorch/kernels/quantized", "quantized") codegen_function_header_wrapper("executorch/kernels/test/custom_kernel_example", "custom_kernel_example") + _common_op_test("op__to_dim_order_copy_test", ["aten", "portable"]) _common_op_test("op_abs_test", ["aten", "portable"]) _common_op_test("op_acos_test", ["aten", "portable"]) _common_op_test("op_acosh_test", ["aten", "portable"]) diff --git a/runtime/core/exec_aten/exec_aten.h b/runtime/core/exec_aten/exec_aten.h index 9eb1f12cd18..919b5420b3a 100644 --- a/runtime/core/exec_aten/exec_aten.h +++ b/runtime/core/exec_aten/exec_aten.h @@ -82,6 +82,9 @@ using quint4x2 = c10::quint4x2; using quint2x4 = c10::quint2x4; using IntArrayRef = at::IntArrayRef; +template +using OptionalArrayRef = c10::OptionalArrayRef; + #else // Use executor types using Tensor = torch::executor::Tensor; @@ -118,6 +121,10 @@ using quint2x4 = torch::executor::quint2x4; using IntArrayRef = torch::executor::IntArrayRef; +template +using OptionalArrayRef = + torch::executor::optional>; + #endif // Use executor types } // namespace exec_aten diff --git a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl index 32a6da5260f..b8817b61885 100644 --- a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl +++ b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl @@ -147,7 +147,7 @@ def define_op_library(name, deps, android_deps, aten_target, _allow_third_party_ link_whole = True, ) -def define_op_target(name, deps, android_deps, is_aten_op, _allow_third_party_deps = False, _aten_mode_deps = []): +def define_op_target(name, deps, android_deps, is_aten_op, is_et_op = True, _allow_third_party_deps = False, _aten_mode_deps = []): """Possibly defines cxx_library targets for the named operator group. Args: @@ -155,6 +155,7 @@ def define_op_target(name, deps, android_deps, is_aten_op, _allow_third_party_de deps: List of deps for the targets. android_deps: List of fbandroid_platform_deps for the target. is_aten_op: True if the operator overload group is ATen-compatible. + is_et_op: True if the operator overload group is ET-compatible. _allow_third_party_deps: If True, the op is allowed to depend on third-party deps outside of //executorch. Should only be used by targets under //executorch/kernels/optimized. @@ -171,13 +172,14 @@ def define_op_target(name, deps, android_deps, is_aten_op, _allow_third_party_de _allow_third_party_deps = _allow_third_party_deps, ) - # When building in ATen mode, ATen-compatible (non-custom) operators will - # use the implementations provided by ATen, so we should not build the - # versions defined here. - define_op_library( - name = name, - deps = deps, - android_deps = android_deps, - aten_target = False, - _allow_third_party_deps = _allow_third_party_deps, - ) + if is_et_op: + # When building in ATen mode, ATen-compatible (non-custom) operators will + # use the implementations provided by ATen, so we should not build the + # versions defined here. + define_op_library( + name = name, + deps = deps, + android_deps = android_deps, + aten_target = False, + _allow_third_party_deps = _allow_third_party_deps, + ) diff --git a/shim/xplat/executorch/kernels/test/util.bzl b/shim/xplat/executorch/kernels/test/util.bzl index a7e1c458fa5..cefb4fae6f0 100644 --- a/shim/xplat/executorch/kernels/test/util.bzl +++ b/shim/xplat/executorch/kernels/test/util.bzl @@ -21,6 +21,8 @@ def op_test(name, deps = [], kernel_name = "portable", use_kernel_prefix = False if kernel_name == "aten": generated_lib_and_op_deps = [ "//executorch/kernels/aten:generated_lib", + #TODO(T187390274): consolidate all aten ops into one target + "//executorch/kernels/aten/cpu:op__to_dim_order_copy_aten", "//executorch/kernels/aten:generated_lib_headers", "//executorch/kernels/test:supported_features_aten", ]