-
Notifications
You must be signed in to change notification settings - Fork 159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[cker/train] Introduce Pad op in train #12522
Changes from 3 commits
bf0356c
e8985be
b04c37a
7d498ea
74c9693
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
/* | ||
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef __NNFW_CKER_TRAIN_OPERATION_PAD_H__ | ||
#define __NNFW_CKER_TRAIN_OPERATION_PAD_H__ | ||
|
||
#include "cker/operation/Pad.h" | ||
|
||
namespace nnfw | ||
{ | ||
namespace cker | ||
{ | ||
namespace train | ||
{ | ||
|
||
/* | ||
* input_data will be transformed by PAD operation with padding options(such as constant C) to | ||
* output_data | ||
* | ||
* input_data -> output_data | ||
* [0,1] -> [C,C,C,C] | ||
* [2,3] -> [C,0,1,C] | ||
* -> [C,2,3,C] | ||
* -> [C,C,C,C] | ||
*/ | ||
/* | ||
* input_data(backward_output_data) will be transformed by backward of PAD operation (Depad) with | ||
* padding options to output_data(backward_input_data) | ||
* | ||
* input_data(backward_output_data) -> output_data(backward_input_data) | ||
* [C,C,C,C] -> [0,1] | ||
* [C,0,1,C] -> [2,3] | ||
* [C,2,3,C] -> | ||
* [C,C,C,C] -> | ||
*/ | ||
template <typename T> | ||
inline void Depad(const int32_t *padding_data, int32_t pad_rank, const Shape &input_shape, | ||
const T *input_data, const Shape &output_shape, T *output_data) | ||
{ | ||
using PaddingInfo = std::pair<int32_t, int32_t>; | ||
using PaddingList = std::vector<PaddingInfo>; | ||
|
||
assert(output_shape.DimensionsCount() == input_shape.DimensionsCount()); | ||
|
||
PaddingList padding_list(pad_rank); | ||
for (int32_t n = 0; n < pad_rank; ++n) | ||
{ | ||
const int32_t *from = padding_data + (n * 2); | ||
padding_list[n] = {from[0], from[1]}; | ||
} | ||
for (int32_t i = 0; i < pad_rank; ++i) | ||
{ | ||
assert(output_shape.Dims(i) == | ||
input_shape.Dims(i) - padding_list[i].first - padding_list[i].second); | ||
} | ||
|
||
switch (pad_rank) | ||
{ | ||
case 0: | ||
case 1: | ||
{ | ||
const int32_t out_width = output_shape.Dims(0); | ||
const int32_t padding_left = padding_list[0].first; | ||
std::memcpy(output_data, input_data + padding_left, out_width * sizeof(T)); | ||
zetwhite marked this conversation as resolved.
Show resolved
Hide resolved
|
||
break; | ||
} | ||
case 2: // HW | ||
{ | ||
const int32_t out_height = output_shape.Dims(0); | ||
const int32_t out_width = output_shape.Dims(1); | ||
const int32_t in_width = input_shape.Dims(1); | ||
const int32_t padding_top = padding_list[0].first; | ||
const int32_t padding_left = padding_list[1].first; | ||
for (auto h = 0; h < out_height; ++h) | ||
{ | ||
const auto in_offset = (h + padding_top) * in_width + padding_left; | ||
const auto out_offset = h * out_width; | ||
// copy a row of input data to output data | ||
std::memcpy(output_data + out_offset, input_data + in_offset, out_width * sizeof(T)); | ||
} | ||
break; | ||
} | ||
case 3: // HWC | ||
{ | ||
const int32_t out_depth = output_shape.Dims(0); | ||
const int32_t out_height = output_shape.Dims(1); | ||
const int32_t out_width = output_shape.Dims(2); | ||
const int32_t out_plain_size = out_height * out_width; | ||
const int32_t in_height = input_shape.Dims(1); | ||
const int32_t in_width = input_shape.Dims(2); | ||
const int32_t in_plain_size = in_height * in_width; | ||
const int32_t padding_depth = padding_list[0].first; | ||
const int32_t padding_top = padding_list[1].first; | ||
const int32_t padding_left = padding_list[2].first; | ||
for (auto d = 0; d < out_depth; ++d) | ||
{ | ||
for (auto h = 0; h < out_height; ++h) | ||
{ | ||
const auto in_offset = | ||
(d + padding_depth) * in_plain_size + (h + padding_top) * in_width + (padding_left); | ||
const auto out_offset = (d * out_plain_size) + (h * out_width); | ||
// copy a row of input data to output data | ||
std::memcpy(output_data + out_offset, input_data + in_offset, out_width * sizeof(T)); | ||
} | ||
} | ||
break; | ||
} | ||
case 4: | ||
{ | ||
const int32_t out_cube = output_shape.Dims(0); | ||
const int32_t out_depth = output_shape.Dims(1); | ||
const int32_t out_height = output_shape.Dims(2); | ||
const int32_t out_width = output_shape.Dims(3); | ||
const int32_t out_plain_size = out_height * out_width; | ||
const int32_t out_cube_size = out_depth * out_plain_size; | ||
const int32_t in_depth = input_shape.Dims(1); | ||
const int32_t in_height = input_shape.Dims(2); | ||
const int32_t in_width = input_shape.Dims(3); | ||
const int32_t in_plain_size = in_height * in_width; | ||
const int32_t in_cube_size = in_depth * in_plain_size; | ||
const int32_t padding_cube = padding_list[0].first; | ||
const int32_t padding_depth = padding_list[1].first; | ||
const int32_t padding_top = padding_list[2].first; | ||
const int32_t padding_left = padding_list[3].first; | ||
for (auto c = 0; c < out_cube; ++c) | ||
{ | ||
for (auto d = 0; d < out_depth; ++d) | ||
{ | ||
for (auto h = 0; h < out_height; ++h) | ||
{ | ||
const auto in_offset = (c + padding_cube) * in_cube_size + | ||
(d + padding_depth) * in_plain_size + | ||
(h + padding_top) * in_width + (padding_left); | ||
const auto out_offset = (c * out_cube_size) + (d * out_plain_size) + (h * out_width); | ||
// copy a row of input data to output data | ||
std::memcpy(output_data + out_offset, input_data + in_offset, out_width * sizeof(T)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit afraid that I might be wrong. But this offset calculation looks like
I referred this : https://oneapi-src.github.io/oneDNN/dev_guide_understanding_memory_formats.html#nchw There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Be not afraid 😄. My code could be wrong. I'll take a look more. Thank you! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IHMO, in cker, the logic here doesn't matter NCHW/NHWC. In the view of Pad kernel, it just does copying. If it might be NCHW, the code var name will be changed. If I'm wrong, I'll leave a comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I agree somehow. IMHO, layout in onert( BTW mostly looks good. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay now I'm sure somewhat. At the train backend level, tensors' layer assume NHWC. // backend/train/Config.cc
ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout) { return ir::Layout::NHWC; }
// backend/train/BackendContext.cc
backend::train::ITensorRegistry *BackendContext::genTrainingTensors()
{
const ir::train::TrainableGraph &tgraph = *trainable_graph();
auto tensor_builder = _tensor_builder;
tgraph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (external_operands().contains(ind))
return;
// NOTE Assuming there is no layout changes (Always assume NHWC or UNKNOWN)
assert(tgraph.layout() != ir::Layout::NCHW);
// TODO Different shape of back propagation tensor
ir::OperandInfo backend_info{obj.shape(), obj.typeInfo(), obj.info().memAllocType(),
obj.isConstant()};
tensor_builder->registerBackwardTensorInfo(ind, backend_info, ir::Layout::NHWC);
});
// TODO Plan tensor builds to reduce peak memory usage
tgraph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
if (tensor_builder->isRegisteredBackward(ind))
tensor_builder->notifyBackwardFirstUse(ind);
});
tensor_builder->allocateBackward();
return _tensor_registry.get();
} Therefore, at the rank-4 tensor in cker, it would be handled as NHWC. I fixed words from specific width/height/depth to abstract(less-specific) row/col/plain. This means that CHW/HWC is not important point in cker. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aha, I think I also got it. |
||
} | ||
} | ||
} | ||
break; | ||
} | ||
default: | ||
throw std::runtime_error("Padding for rank > 4 NYI"); | ||
break; | ||
} | ||
} | ||
|
||
} // namespace train | ||
} // namespace cker | ||
} // namespace nnfw | ||
|
||
#endif // __NNFW_CKER_TRAIN_OPERATION_PAD_H__ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍
It would be good to use this param in cpu backend too.