Skip to content

Commit

Permalink
Support block granularity for QuantizeLinear and DequantizeLinear (#3412
Browse files Browse the repository at this point in the history
)
  • Loading branch information
music-dino authored Sep 28, 2024
1 parent e4eb481 commit 74bc6be
Show file tree
Hide file tree
Showing 33 changed files with 1,254 additions and 70 deletions.
10 changes: 5 additions & 5 deletions docs/dev/onnx_operators.rst
Original file line number Diff line number Diff line change
Expand Up @@ -575,11 +575,11 @@ Operator Support Matrix
+--------------------------+-----------+-----------------+------------------------------+
| QLinearSigmoid || UINT8, INT8 | |
+--------------------------+-----------+-----------------+------------------------------+
| QuantizeLinear || FP8, FP16, | ``block_size``, |
| | | FP32, INT32 | ``output_dtype``, |
| | | | ``saturate`` |
| | | | are not |
| | | | supported |
| QuantizeLinear || FP8, FP16, | ``saturate`` |
| | | FP32, INT32 | is not supported |
| | | | |
| | | | |
| | | | |
+--------------------------+-----------+-----------------+------------------------------+
| RandomNormal || FP16, FP32, | |
| | | FP64 | |
Expand Down
45 changes: 45 additions & 0 deletions src/onnx/include/migraphx/onnx/quantize_dequantize_linear.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_ONNX_QUANTIZE_DEQUANTIZE_LINEAR_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_ONNX_QUANTIZE_DEQUANTIZE_LINEAR_HPP

#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/instruction.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {

std::vector<instruction_ref>
transform_quantize_dequantize_linear_inputs(const onnx_parser::node_info& info,
const std::string& op_name,
int block_size,
int axis,
std::vector<instruction_ref> args);

} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx

#endif
62 changes: 28 additions & 34 deletions src/onnx/parse_dequantizelinear.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -26,6 +26,7 @@
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/onnx/quantize_dequantize_linear.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
Expand All @@ -38,49 +39,42 @@ struct parse_dequantizelinear : op_parser<parse_dequantizelinear>
instruction_ref parse(const op_desc& opd,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
std::vector<instruction_ref> args) const
{
int axis = 1;
if(contains(info.attributes, "axis"))
axis = info.attributes.at("axis").i();

auto input_lens = args[0]->get_shape().lens();
auto n_dim = input_lens.size();

instruction_ref x_scale;
if(args[1]->get_shape().elements() != 1)
if(args.size() < 2 or args.size() > 3)
{
auto tuned_axis = tune_axis(n_dim, axis, opd.op_name);
x_scale = info.add_instruction(
make_op("broadcast", {{"axis", tuned_axis}, {"out_lens", input_lens}}), args[1]);
}
else
{
x_scale = info.add_instruction(make_op("multibroadcast", {{"out_lens", input_lens}}),
args[1]);
MIGRAPHX_THROW("DequantizeLinear: must have either 2 or 3 inputs, " +
std::to_string(args.size()) + " inputs provided");
}

if(args.size() == 3)
{
auto x_zero_point = args[2];
if(x_zero_point->get_shape().elements() != 1)
{
auto tuned_axis = tune_axis(n_dim, axis, opd.op_name);
x_zero_point = info.add_instruction(
make_op("broadcast", {{"axis", tuned_axis}, {"out_lens", input_lens}}),
x_zero_point);
}
else
if(args[0]->get_shape().type() != args[2]->get_shape().type())
MIGRAPHX_THROW("DequantizeLinear: x and y_zero_point must be of same type");

if(args[1]->get_shape().lens() != args[2]->get_shape().lens())
{
x_zero_point = info.add_instruction(
make_op("multibroadcast", {{"out_lens", input_lens}}), x_zero_point);
MIGRAPHX_THROW("DequantizeLinear: y_scale and y_zero_point shape mismatch. "
"Provided y_scale "
"shape: " +
to_string_range(args[1]->get_shape().lens()) +
", provided y_zero_point shape: " +
to_string_range(args[2]->get_shape().lens()));
}

return info.add_instruction(
make_op("dequantizelinear"), args[0], x_scale, x_zero_point);
}

return info.add_instruction(make_op("dequantizelinear"), args[0], x_scale);
int axis = 1;
if(contains(info.attributes, "axis"))
axis = info.attributes.at("axis").i();

int block_size = 0;
if(contains(info.attributes, "block_size"))
block_size = info.attributes.at("block_size").i();

args =
transform_quantize_dequantize_linear_inputs(info, opd.op_name, block_size, axis, args);

return info.add_instruction(make_op("dequantizelinear"), args);
}
};

Expand Down
85 changes: 57 additions & 28 deletions src/onnx/parse_quantizelinear.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -27,6 +27,7 @@
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/common.hpp>
#include <migraphx/onnx/quantize_dequantize_linear.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
Expand All @@ -37,47 +38,75 @@ struct parse_quantizelinear : op_parser<parse_quantizelinear>
std::vector<op_desc> operators() const { return {{"QuantizeLinear"}}; }

instruction_ref parse(const op_desc& opd,
const onnx_parser& /*parser*/,
const onnx_parser& parser,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
std::vector<instruction_ref> args) const
{
if(args.size() < 2 or args.size() > 3)
{
MIGRAPHX_THROW("QuantizeLinear: must have either 2 or 3 inputs, " +
std::to_string(args.size()) + " input(s) provided");
}

// Starting with version 19 ONNX introduced the constraint that x and y_scale types must be
// the same
if(parser.opset_version >= 19 and
args[0]->get_shape().type() != args[1]->get_shape().type())
{
MIGRAPHX_THROW("QuantizeLinear: x and y_scale must be of same type");
}

if(args.size() == 3 and args[1]->get_shape().lens() != args[2]->get_shape().lens())
{
MIGRAPHX_THROW(
"QuantizeLinear: y_scale and y_zero_point shapes must be equal. Provided y_scale "
"shape: " +
to_string_range(args[1]->get_shape().lens()) +
", provided y_zero_point shape: " + to_string_range(args[2]->get_shape().lens()));
}

int axis = 1;
if(contains(info.attributes, "axis"))
axis = info.attributes.at("axis").i();

auto input_lens = args[0]->get_shape().lens();
auto n_dim = input_lens.size();
int block_size = 0;
if(contains(info.attributes, "block_size"))
block_size = info.attributes.at("block_size").i();

instruction_ref y_scale = args[1];
if(args[1]->get_shape().elements() != 1)
std::optional<migraphx::shape::type_t> output_type;
if(contains(info.attributes, "output_dtype"))
{
auto tuned_axis = tune_axis(n_dim, axis, opd.op_name);
y_scale = info.add_instruction(
make_op("broadcast", {{"axis", tuned_axis}, {"out_lens", input_lens}}), args[1]);
output_type = get_type(info.attributes.at("output_dtype").i());
}

auto common_args = add_common_args(*info.mod, {args[0], y_scale});

if(args.size() == 3)
if(output_type.has_value() and args.size() == 3 and
*output_type != args[2]->get_shape().type())
{
auto y_zero_point = args[2];
if(y_zero_point->get_shape().elements() != 1)
{
auto tuned_axis = tune_axis(n_dim, axis, opd.op_name);
y_zero_point = info.add_instruction(
make_op("broadcast", {{"axis", tuned_axis}, {"out_lens", input_lens}}),
y_zero_point);
}
else
{
y_zero_point = info.add_instruction(
make_op("multibroadcast", {{"out_lens", input_lens}}), y_zero_point);
}
MIGRAPHX_THROW(
"QuantizeLinear: output_type and y_zero_point type must match. output_type: " +
to_string(*output_type) +
+", y_zero_point type: " + to_string(args[2]->get_shape().type()));
}

args =
transform_quantize_dequantize_linear_inputs(info, opd.op_name, block_size, axis, args);

common_args.push_back(y_zero_point);
if(parser.opset_version < 19)
{
auto common_type = common_shape({args[0]->get_shape(), args[1]->get_shape()}).type();
std::transform(args.begin(), args.begin() + 2, args.begin(), [&](auto ins) {
if(ins->get_shape().type() != common_type)
ins = info.add_instruction(make_op("convert", {{"target_type", common_type}}),
ins);
return ins;
});
}

return info.add_instruction(make_op("quantizelinear"), common_args);
if(output_type.has_value())
return info.add_instruction(make_op("quantizelinear", {{"out_type", *output_type}}),
args);
else
return info.add_instruction(make_op("quantizelinear"), args);
}
};

Expand Down
Loading

0 comments on commit 74bc6be

Please sign in to comment.