diff --git a/runtime/onert/backend/train/BackendContext.cc b/runtime/onert/backend/train/BackendContext.cc index 59fee712247..b903fd09faf 100644 --- a/runtime/onert/backend/train/BackendContext.cc +++ b/runtime/onert/backend/train/BackendContext.cc @@ -16,12 +16,14 @@ #include "BackendContext.h" +#include "ExtraTensorGenerator.h" #include "TensorBuilder.h" #include "TensorPlanner.h" #include "KernelGenerator.h" #include "ops/BackPropInitializer.h" #include +#include #include #include @@ -179,6 +181,32 @@ FunctionMap BackendContext::gen() // fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); // } + ExtraTensorGenerator extra_tensor_gen(trainable_graph(), _tensor_builder, _tensor_registry); + + const auto &ops = trainable_graph()->operations(); + + for (auto &pair : fn_map) + { + auto &op_idx = pair.first; + auto &fn_seq = pair.second; + + const ir::IOperation *op = &ops.at(op_idx); + const auto trainable_op = dynamic_cast(op); + assert(trainable_op != nullptr); + + if (not trainable_op->isRequiredForBackward()) + continue; + + VERBOSE(ExtraTensor) << "register tensor for " << trainable_op->name() << std::endl; + + fn_seq->iterate([&](exec::train::ITrainableFunction &fn) { + extra_tensor_gen.register_tensors(op_idx, (&fn)->registerExtraTensors()); + }); + } + extra_tensor_gen.plan(); + extra_tensor_gen.allocate(); + + return fn_map; } diff --git a/runtime/onert/backend/train/ExtraTensorGenerator.cc b/runtime/onert/backend/train/ExtraTensorGenerator.cc new file mode 100644 index 00000000000..63dbca4d7a5 --- /dev/null +++ b/runtime/onert/backend/train/ExtraTensorGenerator.cc @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ExtraTensorGenerator.h" + +#include "ExtraTensorIndex.h" + +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace train +{ + +ExtraTensorGenerator::ExtraTensorGenerator(const ir::train::TrainableGraph *tgraph, + std::shared_ptr &tensor_builder, + std::shared_ptr &tensor_registry) + : _tgraph(tgraph), _tensor_builder(tensor_builder) +{ + _tensor_reg = std::dynamic_pointer_cast(tensor_registry); +} + +// Move to BackendContext.cc +void ExtraTensorGenerator::register_tensors(ir::OperationIndex op_idx, + std::optional &&tensors) +{ + if (not tensors.has_value()) + return; + + auto extra_tensors = tensors.value(); + + auto &operations = _tgraph->operations(); + + for (size_t i = 0; i < extra_tensors.size(); i++) + { + // register tensor + ExtraTensorIndex tensor_idx(op_idx, i); + _tensor_builder->registerExtraTensor(tensor_idx, extra_tensors[i]); + + std::stringstream op_info; + op_info << op_idx << "_" << operations.at(op_idx).name(); + VERBOSE(ExtraTensorGenerator) << "register (idx:" << tensor_idx << ") requested from " + << op_info.str() << std::endl; + } + return; +} + +ExtraTensors ExtraTensorGenerator::getExtraTensors(const ir::OperationIndex &op_index) +{ + ExtraTensors tensors; + + int sub_index = 0; + + ExtraTensorIndex index(op_index, sub_index); + auto tensor = _tensor_reg->getExtraTensor(index); + + while (tensor != nullptr) + { + sub_index++; + tensors.push_back(tensor); + + ExtraTensorIndex index(op_index, sub_index); + tensor = _tensor_reg->getExtraTensor(index); + + VERBOSE(ExtraTensorGenerator) << "HERE " << op_index << "+" << sub_index << std::endl; + } + + return tensors; +} + +// Move to TensorPlanner +void ExtraTensorGenerator::plan() +{ + // forwarding order + const auto f_order = _tgraph->topolSortOperations(); + for (const auto &op_index : f_order) + { + auto tensors = getExtraTensors(op_index); + for (auto i = 0u; i < tensors.size(); ++i) + { + const auto < = tensors[i]->lifetime(); + if (lt == ExtraTensorLifeTime::FORWARD_TO_BACKWARD) + _tensor_builder->notifyFirstUse(ExtraTensorIndex(op_index, i)); + } + } + + // backwarding order + const auto b_order = _tgraph->essentialBackwardOrder(); + for (const auto &op_index : b_order) + { + + auto tensors = getExtraTensors(op_index); + for (auto i = 0u; i < tensors.size(); ++i) + { + const auto < = tensors[i]->lifetime(); + if (lt == ExtraTensorLifeTime::BACKWARD) + _tensor_builder->notifyFirstUse(ExtraTensorIndex(op_index, i)); + } + + for (auto i = 0u; i < tensors.size(); ++i) + { + const auto < = tensors[i]->lifetime(); + if (lt == ExtraTensorLifeTime::FORWARD_TO_BACKWARD || lt == ExtraTensorLifeTime::BACKWARD) + _tensor_builder->notifyLastUse(ExtraTensorIndex(op_index, i)); + } + } +} + +// Move to allocateBackward() +void ExtraTensorGenerator::allocate() { _tensor_builder->allocateExtra(); } + +} // namespace train +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/train/ExtraTensorGenerator.h b/runtime/onert/backend/train/ExtraTensorGenerator.h new file mode 100644 index 00000000000..063f52e5d66 --- /dev/null +++ b/runtime/onert/backend/train/ExtraTensorGenerator.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__ +#define __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__ + +#include +#include +#include + +#include "TensorBuilder.h" + +namespace onert +{ +namespace backend +{ +namespace train +{ + +class ExtraTensorGenerator +{ +public: + ExtraTensorGenerator() = delete; + + ExtraTensorGenerator(const ir::train::TrainableGraph *tgraph, + std::shared_ptr &tensor_builder, + std::shared_ptr &tensor_registry); + +public: + // Since register is reserved keyword, use 'register_tensors' intead of 'register' + void register_tensors(ir::OperationIndex idx, std::optional &&tensors); + void plan(); + void allocate(); + +private: + ExtraTensors getExtraTensors(const ir::OperationIndex &op_index); + + const ir::train::TrainableGraph *_tgraph; + std::shared_ptr _tensor_builder; + std::shared_ptr _tensor_reg; +}; + +} // namespace train +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_EXTRA_TENSOR_GENERATOR_H__ diff --git a/runtime/onert/backend/train/MemoryManager.cc b/runtime/onert/backend/train/MemoryManager.cc index 4902e2a7eaa..7948d9af205 100644 --- a/runtime/onert/backend/train/MemoryManager.cc +++ b/runtime/onert/backend/train/MemoryManager.cc @@ -17,6 +17,7 @@ #include "MemoryManager.h" #include "MemoryPlannerFactory.h" +#include "ExtraTensorIndex.h" #include @@ -53,46 +54,53 @@ uint8_t *TrainableMemoryManager::getOptVarBuffer(const ir::OperandIndex &ind, return _var_mem_alloc->base() + var_offset + mem_blk.offset; } -DisposableMemoryManager::DisposableMemoryManager() : _mem_planner{createMemoryPlanner()} +template +TrainMemoryManager::TrainMemoryManager() : _mem_planner{createMemoryPlanner()} { // DO NOTHING } -basic::IMemoryPlanner *DisposableMemoryManager::createMemoryPlanner() +template +basic::IMemoryPlanner *TrainMemoryManager::createMemoryPlanner() { auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER); - return MemoryPlannerFactory::get().create(planner_id); + return MemoryPlannerFactory::get().create(planner_id); } -basic::IMemoryPlanner * -DisposableMemoryManager::createMemoryPlanner(const std::string planner_id) +template +basic::IMemoryPlanner * +TrainMemoryManager::createMemoryPlanner(const std::string planner_id) { - return MemoryPlannerFactory::get().create(planner_id); + return MemoryPlannerFactory::get().create(planner_id); } -void DisposableMemoryManager::claimPlan(const DisposableTensorIndex &ind, uint32_t size) +template void TrainMemoryManager::claimPlan(const Index &ind, uint32_t size) { _mem_planner->claim(ind, size); } -void DisposableMemoryManager::releasePlan(const DisposableTensorIndex &ind) +template void TrainMemoryManager::releasePlan(const Index &ind) { _mem_planner->release(ind); } -void DisposableMemoryManager::allocate(void) +template void TrainMemoryManager::allocate(void) { _mem_alloc = std::make_shared(_mem_planner->capacity()); assert(_mem_alloc->base()); } -uint8_t *DisposableMemoryManager::getBuffer(const DisposableTensorIndex &ind) const +template uint8_t *TrainMemoryManager::getBuffer(const Index &ind) const { assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end()); const auto &mem_blk = _mem_planner->memory_plans().at(ind); return _mem_alloc->base() + mem_blk.offset; } +// Instatiation +template class TrainMemoryManager; +template class TrainMemoryManager; + } // namespace train } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/train/MemoryManager.h b/runtime/onert/backend/train/MemoryManager.h index 19a60e32deb..b15de09d239 100644 --- a/runtime/onert/backend/train/MemoryManager.h +++ b/runtime/onert/backend/train/MemoryManager.h @@ -20,6 +20,7 @@ #include #include "DisposableTensorIndex.h" +#include "ExtraTensorIndex.h" namespace onert { @@ -44,29 +45,33 @@ class TrainableMemoryManager : public MemoryManager uint32_t _optim_vars_count; }; -class DisposableMemoryManager +// TODO: Find a better name +template class TrainMemoryManager { public: - DisposableMemoryManager(); + TrainMemoryManager(); void allocate(void); - uint8_t *getBuffer(const DisposableTensorIndex &ind) const; + uint8_t *getBuffer(const Index &ind) const; void deallocate(void) { _mem_alloc->release(); } - void claimPlan(const DisposableTensorIndex &ind, uint32_t size); - void releasePlan(const DisposableTensorIndex &ind); + void claimPlan(const Index &ind, uint32_t size); + void releasePlan(const Index &ind); std::shared_ptr getMemAlloc() { return _mem_alloc; } private: - basic::IMemoryPlanner *createMemoryPlanner(); - basic::IMemoryPlanner *createMemoryPlanner(const std::string planner_id); + basic::IMemoryPlanner *createMemoryPlanner(); + basic::IMemoryPlanner *createMemoryPlanner(const std::string planner_id); private: - std::shared_ptr> _mem_planner; + std::shared_ptr> _mem_planner; std::shared_ptr _mem_alloc; }; +using DisposableMemoryManager = TrainMemoryManager; +using ExtraMemoryManager = TrainMemoryManager; + } // namespace train } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/train/MemoryPlanner.cc b/runtime/onert/backend/train/MemoryPlanner.cc index ea385558e28..e60e9f99948 100644 --- a/runtime/onert/backend/train/MemoryPlanner.cc +++ b/runtime/onert/backend/train/MemoryPlanner.cc @@ -15,6 +15,8 @@ */ #include "MemoryPlanner.h" +#include "DisposableTensorIndex.h" +#include "ExtraTensorIndex.h" #include @@ -27,7 +29,7 @@ namespace backend namespace train { -void BumpPlanner::claim(const DisposableTensorIndex &ind, size_t size) +template void BumpPlanner::claim(const Index &ind, size_t size) { basic::Block blk{_capacity, size}; _mem_plans[ind] = blk; @@ -36,7 +38,7 @@ void BumpPlanner::claim(const DisposableTensorIndex &ind, size_t size) VERBOSE(BP_PLANNER) << "CLAIM(" << ind << "): " << blk.offset << ", " << blk.size << std::endl; } -void BumpPlanner::release(const DisposableTensorIndex &ind) +template void BumpPlanner::release(const Index &ind) { VERBOSE(BP_PLANNER) << "RELEASE(" << ind << "): " << "NOTHING does" << std::endl; @@ -56,7 +58,7 @@ void BumpPlanner::release(const DisposableTensorIndex &ind) // point in time, it means the place at the offset can be claimed. // 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than // the previous claim_base_offset. -void FirstFitPlanner::claim(const DisposableTensorIndex &ind, size_t size) +template void FirstFitPlanner::claim(const Index &ind, size_t size) { // Find the right position for claiming uint32_t next_offset = 0; @@ -88,7 +90,7 @@ void FirstFitPlanner::claim(const DisposableTensorIndex &ind, size_t size) } } -void FirstFitPlanner::release(const DisposableTensorIndex &ind) +template void FirstFitPlanner::release(const Index &ind) { for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it) { @@ -107,14 +109,15 @@ void FirstFitPlanner::release(const DisposableTensorIndex &ind) assert(!"Cannot release for given index. It has been not claimed or released already."); } -WICPlanner::WICPlanner() +template +WICPlanner::WICPlanner() : _initialized(false), _capacity(0), _mem_plans(), _live_indices(), _interference_graph(), _indices() { // DO NOTHING } -void WICPlanner::claim(const DisposableTensorIndex &ind, size_t size) +template void WICPlanner::claim(const Index &ind, size_t size) { _indices.emplace(size, ind); _interference_graph[ind].insert(_interference_graph[ind].end(), _live_indices.cbegin(), @@ -128,7 +131,7 @@ void WICPlanner::claim(const DisposableTensorIndex &ind, size_t size) VERBOSE(WIC_PLANNER) << "claim(" << ind << "): [" << size << "sz]" << std::endl; } -void WICPlanner::release(const DisposableTensorIndex &ind) +template void WICPlanner::release(const Index &ind) { _live_indices.erase(ind); VERBOSE(WIC_PLANNER) << "release(" << ind << ")" << std::endl; @@ -143,7 +146,7 @@ void WICPlanner::release(const DisposableTensorIndex &ind) * 3. Allocate memory block for sorted operands * - Find free memory block which does not overlap with interfered operands */ -void WICPlanner::buildMemoryPlans() +template void WICPlanner::buildMemoryPlans() { for (const auto &[size, ind] : _indices) { @@ -194,13 +197,22 @@ void WICPlanner::buildMemoryPlans() _indices.clear(); } -std::unordered_map &WICPlanner::memory_plans() +template typename WICPlanner::MemoryPlans &WICPlanner::memory_plans() { if (!_initialized) buildMemoryPlans(); return _mem_plans; } +template class BumpPlanner; +template class BumpPlanner; + +template class FirstFitPlanner; +template class FirstFitPlanner; + +template class WICPlanner; +template class WICPlanner; + } // namespace train } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/train/MemoryPlanner.h b/runtime/onert/backend/train/MemoryPlanner.h index 181dd5e6979..5e3f48e02f8 100644 --- a/runtime/onert/backend/train/MemoryPlanner.h +++ b/runtime/onert/backend/train/MemoryPlanner.h @@ -24,13 +24,14 @@ #include -#include "DisposableTensorIndex.h" - #include #include #include #include +#include "DisposableTensorIndex.h" +#include "ExtraTensorIndex.h" + namespace onert { namespace backend @@ -41,20 +42,22 @@ namespace train /** * @brief Class to plan memory by bump way */ -class BumpPlanner : public basic::IMemoryPlanner +template class BumpPlanner : public basic::IMemoryPlanner { + using MemoryPlans = typename basic::IMemoryPlanner::MemoryPlans; + public: /** * @brief Claim memory for tensor by bump way * @param[in] index The tensor index * @param[in] size The size of the memory */ - void claim(const DisposableTensorIndex &, size_t) override; + void claim(const Index &, size_t) override; /** * @brief Release memory for tensor by bump way * @param[in] index The tensor index */ - void release(const DisposableTensorIndex &) override; + void release(const Index &) override; /** * @brief Get capacity for memory planning * @return The value of capacity @@ -74,20 +77,22 @@ class BumpPlanner : public basic::IMemoryPlanner /** * @brief Class to plan memory by firstfit way */ -class FirstFitPlanner : public basic::IMemoryPlanner +template class FirstFitPlanner : public basic::IMemoryPlanner { + using MemoryPlans = typename basic::IMemoryPlanner::MemoryPlans; + public: /** * @brief Claim memory for tensor by firstfit way * @param[in] index The tensor index * @param[in] size The size of the memory */ - void claim(const DisposableTensorIndex &, size_t) override; + void claim(const Index &, size_t) override; /** * @brief Release memory for tensor by firstfit way * @param[in] index The tensor index */ - void release(const DisposableTensorIndex &) override; + void release(const Index &) override; /** * @brief Get capacity for memory planning * @return The value of capacity @@ -103,14 +108,17 @@ class FirstFitPlanner : public basic::IMemoryPlanner uint32_t _capacity = 0; MemoryPlans _mem_plans; // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset) - std::map _claim_table; + std::map _claim_table; }; /** * @brief Class to plan memory by Weighted Interval Color algorithm */ -class WICPlanner : public basic::IMemoryPlanner +template class WICPlanner : public basic::IMemoryPlanner { +public: + using MemoryPlans = typename basic::IMemoryPlanner::MemoryPlans; + public: WICPlanner(); @@ -119,12 +127,12 @@ class WICPlanner : public basic::IMemoryPlanner * @param[in] index The tensor index * @param[in] size The size of the memory */ - void claim(const DisposableTensorIndex &, size_t) override; + void claim(const Index &, size_t) override; /** * @brief Release memory for tensor by WIC algorithm * @param[in] index The tensor index */ - void release(const DisposableTensorIndex &) override; + void release(const Index &) override; /** * @brief Get capacity for memory planning * @return The value of capacity @@ -147,10 +155,10 @@ class WICPlanner : public basic::IMemoryPlanner bool _initialized; uint32_t _capacity; MemoryPlans _mem_plans; - std::unordered_set _live_indices; - DisposableTensorIndexMap> _interference_graph; + std::unordered_set _live_indices; + std::unordered_map> _interference_graph; // Sort tensors by descending order of size - std::multimap> _indices; + std::multimap> _indices; }; } // namespace train diff --git a/runtime/onert/backend/train/MemoryPlanner.test.cc b/runtime/onert/backend/train/MemoryPlanner.test.cc index 8978607706f..15b9dc15693 100644 --- a/runtime/onert/backend/train/MemoryPlanner.test.cc +++ b/runtime/onert/backend/train/MemoryPlanner.test.cc @@ -16,6 +16,7 @@ #include +#include "DisposableTensorIndex.h" #include "MemoryPlanner.h" #include "ir/Index.h" @@ -25,7 +26,7 @@ using onert::ir::OperationIndex; TEST(BumpPlanner, claim_test) { - BumpPlanner planner; + BumpPlanner planner; auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size, uint32_t expected_offset) { @@ -55,7 +56,7 @@ TEST(BumpPlanner, claim_test) TEST(FirstFitPlanner, claim_release_test) { - FirstFitPlanner planner; + FirstFitPlanner planner; auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size, uint32_t expected_offset) { @@ -148,7 +149,7 @@ TEST(FirstFitPlanner, claim_release_test) TEST(FirstFitPlanner, neg_release_non_existing_index) { - FirstFitPlanner planner; + FirstFitPlanner planner; auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size, uint32_t expected_offset) { @@ -184,7 +185,7 @@ TEST(FirstFitPlanner, neg_release_non_existing_index) TEST(FirstFitPlanner, neg_release_twice) { - FirstFitPlanner planner; + FirstFitPlanner planner; auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size, uint32_t expected_offset) { @@ -223,7 +224,7 @@ TEST(FirstFitPlanner, neg_release_twice) TEST(WICPlanner, claim_release_test) { - WICPlanner planner; + WICPlanner planner; auto claim = [&planner](uint32_t op_index, uint32_t operand_index, size_t size) { DisposableTensorIndex mem_idx{OperationIndex{op_index}, OperandIndex{operand_index}}; diff --git a/runtime/onert/backend/train/MemoryPlannerFactory.cc b/runtime/onert/backend/train/MemoryPlannerFactory.cc index acfa44e3511..e1e80119213 100644 --- a/runtime/onert/backend/train/MemoryPlannerFactory.cc +++ b/runtime/onert/backend/train/MemoryPlannerFactory.cc @@ -16,6 +16,8 @@ #include "MemoryPlannerFactory.h" +#include "DisposableTensorIndex.h" +#include "ExtraTensorIndex.h" namespace onert { namespace backend @@ -23,29 +25,47 @@ namespace backend namespace train { -MemoryPlannerFactory &MemoryPlannerFactory::get() +template MemoryPlannerFactory &MemoryPlannerFactory::get() { - static MemoryPlannerFactory instance; + static MemoryPlannerFactory instance; return instance; } -basic::IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key) +template +basic::IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key) { if (key == "FirstFit") { - return new FirstFitPlanner; + return new FirstFitPlanner(); } else if (key == "Bump") { - return new BumpPlanner; + return new BumpPlanner(); } else if (key == "WIC") { - return new WICPlanner; + return new WICPlanner(); } - return new FirstFitPlanner; // Default Planner + return new FirstFitPlanner(); // Default Planner } +// is this necessary? +/** +/usr/bin/ld: libbackend_train.so: undefined reference to +`onert::backend::train::MemoryPlannerFactory::create(std::__cxx11::basic_string, std::allocator > const&)' /usr/bin/ld: libbackend_train.so: undefined +reference to +`onert::backend::train::MemoryPlannerFactory::create(std::__cxx11::basic_string, std::allocator > const&)' /usr/bin/ld: libbackend_train.so: undefined +reference to +`onert::backend::train::MemoryPlannerFactory::get()' +/usr/bin/ld: libbackend_train.so: undefined reference to +`onert::backend::train::MemoryPlannerFactory::get()' +collect2: error: ld returned 1 exit status + */ +template class MemoryPlannerFactory; +template class MemoryPlannerFactory; + } // namespace train } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/train/MemoryPlannerFactory.h b/runtime/onert/backend/train/MemoryPlannerFactory.h index d1609e17559..7f42be46f90 100644 --- a/runtime/onert/backend/train/MemoryPlannerFactory.h +++ b/runtime/onert/backend/train/MemoryPlannerFactory.h @@ -28,7 +28,7 @@ namespace backend namespace train { -class MemoryPlannerFactory +template class MemoryPlannerFactory { public: static MemoryPlannerFactory &get(); @@ -38,7 +38,7 @@ class MemoryPlannerFactory public: // Currently, only the memory planner for DisposableTensor is supported - basic::IMemoryPlanner *create(const std::string &key); + basic::IMemoryPlanner *create(const std::string &key); }; } // namespace train diff --git a/runtime/onert/backend/train/TensorBuilder.cc b/runtime/onert/backend/train/TensorBuilder.cc index ee737222be2..3b2d6f40b9a 100644 --- a/runtime/onert/backend/train/TensorBuilder.cc +++ b/runtime/onert/backend/train/TensorBuilder.cc @@ -17,6 +17,9 @@ #include "TensorBuilder.h" #include "Tensor.h" +#include "ExtraTensorIndex.h" + +#include namespace onert { @@ -95,6 +98,12 @@ void TensorBuilder::registerDisposableBackwardTensorInfo(const DisposableTensorI _disposable_backprops.add(index); } +void TensorBuilder::registerExtraTensor(const ExtraTensorIndex &index, + std::shared_ptr &tensor) +{ + _tensor_reg->setExtraTensor(index, tensor); +} + void TensorBuilder::notifyFirstUse(const ir::OperandIndex &index) { // TODO Support momory plan @@ -155,6 +164,16 @@ void TensorBuilder::notifyDisposableBackPropLastUse(const DisposableTensorIndex _tensor_mgr->releaseDisposableBackPropPlan(index); } +void TensorBuilder::notifyFirstUse(const ExtraTensorIndex &index) +{ + _tensor_mgr->claimExtraPlan(index); +} + +void TensorBuilder::notifyLastUse(const ExtraTensorIndex &index) +{ + _tensor_mgr->releaseExtraPlan(index); +} + bool TensorBuilder::isRegistered(const ir::OperandIndex &index) const { return _tensor_info_map.find(index) != _tensor_info_map.end(); @@ -183,6 +202,8 @@ void TensorBuilder::allocateBackward(void) _tensor_mgr->allocateDisposableBackPropTensors(); } +void TensorBuilder::allocateExtra(void) { _tensor_mgr->allocateExtraTensors(); } + } // namespace train } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/train/TensorBuilder.h b/runtime/onert/backend/train/TensorBuilder.h index 1fa46855142..c62cf1acf03 100644 --- a/runtime/onert/backend/train/TensorBuilder.h +++ b/runtime/onert/backend/train/TensorBuilder.h @@ -18,6 +18,7 @@ #define __ONERT_BACKEND_TRAIN_TENSOR_BUILDER_H__ #include "DisposableTensorIndex.h" +#include "ExtraTensorIndex.h" #include "TensorManager.h" #include "TensorRegistry.h" #include "util/Set.h" @@ -55,6 +56,8 @@ class TensorBuilder void registerDisposableBackwardTensorInfo(const DisposableTensorIndex &index, const ir::OperandInfo &info); + void registerExtraTensor(const ExtraTensorIndex &index, std::shared_ptr &info); + // TODO Support memory plan of all tensors void notifyFirstUse(const ir::OperandIndex &); void notifyLastUse(const ir::OperandIndex &); @@ -62,6 +65,8 @@ class TensorBuilder void notifyBackwardLastUse(const ir::OperandIndex &); void notifyDisposableBackPropFirstUse(const DisposableTensorIndex &); void notifyDisposableBackPropLastUse(const DisposableTensorIndex &); + void notifyFirstUse(const ExtraTensorIndex &); + void notifyLastUse(const ExtraTensorIndex &); bool isRegistered(const ir::OperandIndex &) const; bool isRegisteredBackward(const ir::OperandIndex &) const; @@ -69,6 +74,7 @@ class TensorBuilder void allocate(void); void allocateBackward(void); + void allocateExtra(void); // < this function will be called after genKernels private: const std::shared_ptr _tensor_reg; diff --git a/runtime/onert/backend/train/TensorManager.cc b/runtime/onert/backend/train/TensorManager.cc index d8404fcc9ed..675d11759b1 100644 --- a/runtime/onert/backend/train/TensorManager.cc +++ b/runtime/onert/backend/train/TensorManager.cc @@ -58,7 +58,8 @@ TensorManager::TensorManager(const std::shared_ptr ®, uint32_ _trainable_mgr{new TrainableMemoryManager(optim_vars_count)}, _back_prop_mgr{new MemoryManager()}, _gradient_mgr{new MemoryManager()}, // TODO Find a suitable planner of disposable tensors to reduce peak memory usage - _disposable_back_prop_mgr{new DisposableMemoryManager()}, _tensors{reg} + _disposable_back_prop_mgr{new DisposableMemoryManager()}, _extra_mgr{new ExtraMemoryManager()}, + _tensors{reg} { // DO NOTHING } @@ -115,6 +116,11 @@ void TensorManager::claimNonConstPlan(const ir::OperandIndex &index) _nonconst_mgr->claimPlan(index, size); } +void TensorManager::allocateExtraTensors() +{ + allocateMemory(_extra_mgr.get(), _tensors->extra_tensors(), std::string{"EXTRA TENSOR "}); +} + void TensorManager::releaseNonConstPlan(const ir::OperandIndex &index) { assert(_tensors->getNonConstTensor(index) && !_tensors->getNonConstTensor(index)->is_dynamic()); @@ -187,6 +193,19 @@ void TensorManager::releaseDisposableBackPropPlan(const DisposableTensorIndex &i _disposable_back_prop_mgr->releasePlan(index); } +void TensorManager::claimExtraPlan(const ExtraTensorIndex &index) +{ + const auto tensor = _tensors->getExtraTensor(index); + + auto size = alignedSize(tensor->total_size(), _align); + _extra_mgr->claimPlan(index, size); +} + +void TensorManager::releaseExtraPlan(const ExtraTensorIndex &index) +{ + _extra_mgr->releasePlan(index); +} + } // namespace train } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/train/TensorManager.h b/runtime/onert/backend/train/TensorManager.h index 6e0910e182d..2da8420f26b 100644 --- a/runtime/onert/backend/train/TensorManager.h +++ b/runtime/onert/backend/train/TensorManager.h @@ -18,6 +18,7 @@ #define __ONERT_BACKEND_TRAIN_TENSOR_MANAGER_H__ #include "DisposableTensorIndex.h" +#include "ExtraTensorIndex.h" #include "MemoryManager.h" #include "TensorRegistry.h" @@ -49,6 +50,8 @@ class TensorManager void allocateBackPropTensors(); void allocateGradientTensors(); void allocateDisposableBackPropTensors(); + void allocateExtraTensors(); + // TODO Add member functions to deallocate tensors void claimNonConstPlan(const ir::OperandIndex &ind); @@ -61,6 +64,8 @@ class TensorManager void releaseGradientPlan(const ir::OperandIndex &ind); void claimDisposableBackPropPlan(const DisposableTensorIndex &ind); void releaseDisposableBackPropPlan(const DisposableTensorIndex &ind); + void claimExtraPlan(const ExtraTensorIndex &ind); + void releaseExtraPlan(const ExtraTensorIndex &ind); private: std::unique_ptr _nonconst_mgr; @@ -68,6 +73,7 @@ class TensorManager std::unique_ptr _back_prop_mgr; std::unique_ptr _gradient_mgr; std::unique_ptr _disposable_back_prop_mgr; + std::unique_ptr _extra_mgr; const std::shared_ptr _tensors; }; diff --git a/runtime/onert/backend/train/TensorRegistry.h b/runtime/onert/backend/train/TensorRegistry.h index 13932199a9d..643695d61b0 100644 --- a/runtime/onert/backend/train/TensorRegistry.h +++ b/runtime/onert/backend/train/TensorRegistry.h @@ -18,6 +18,7 @@ #define __ONERT_BACKEND_TRAIN_TENSOR_REGISTRY__ #include +#include #include "DisposableTensorIndex.h" #include "Tensor.h" @@ -60,9 +61,35 @@ class TensorRegistry return _disposable_back_prop; } + std::shared_ptr getExtraTensor(const ExtraTensorIndex &index) + { + auto itr = _extra.find(index); + if (itr != _extra.end()) + return itr->second; + + return nullptr; + } + + void setExtraTensor(const ExtraTensorIndex &index, std::shared_ptr &tensor) + { + assert(tensor != nullptr); + auto itr = _extra.find(index); + if (itr != _extra.end()) + throw std::runtime_error{ + "Tried to set a extra tensor but another extra tensor already exists."}; + + _extra[index] = tensor; + } + + const std::unordered_map> &extra_tensors() + { + return _extra; + } + private: // Disposable Tensors to be accumulated to BackPropTensor std::unordered_map> _disposable_back_prop; + std::unordered_map> _extra; }; } // namespace train diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc index 3c4ce2f7ce1..a1a19fe56fb 100644 --- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc +++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.cc @@ -55,11 +55,22 @@ void BinaryArithmeticLayer::configureBackward(IPortableTensor *back_prop_lhs, if (activation != ir::Activation::NONE) { - _act_back_prop_output = std::make_unique(_output->get_info()); - _act_back_prop_output->setBuffer(std::make_shared(_output->total_size())); + _act_back_prop_output = std::make_shared(_back_prop_output->get_info()); } } +std::optional BinaryArithmeticLayer::registerExtraTensors() +{ + ExtraTensors tensors; + + if (_act_back_prop_output != nullptr) + { + tensors.push_back(_act_back_prop_output); + } + + return std::optional(tensors); +} + void BinaryArithmeticLayer::forward(bool) { cpu::ops::BinaryArithmeticLayer::run(); } void BinaryArithmeticLayer::backward() diff --git a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h index 60d6e8be1cc..598cd3327ca 100644 --- a/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h +++ b/runtime/onert/backend/train/ops/BinaryArithmeticLayer.h @@ -50,6 +50,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction, void configureBackward(IPortableTensor *back_prop_lhs, IPortableTensor *back_prop_rhs, const IPortableTensor *back_prop_output, const ir::Activation activation, const ArithmeticType arithmetic_type); + std::optional registerExtraTensors() override; void forward(bool training) override; void backward() override; @@ -60,7 +61,7 @@ class BinaryArithmeticLayer : public ::onert::exec::train::ITrainableFunction, ArithmeticType _arithmetic_type; ir::Activation _activation; - std::unique_ptr _act_back_prop_output; + std::shared_ptr _act_back_prop_output; }; } // namespace ops diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc index 41ff7fd1c43..28c66c13737 100644 --- a/runtime/onert/backend/train/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/train/ops/ConvolutionLayer.cc @@ -31,7 +31,7 @@ namespace using namespace onert; template -std::unique_ptr createTransposedWeights(const backend::IPortableTensor *origin_weights) +std::shared_ptr createTransposedWeights(const backend::IPortableTensor *origin_weights) { const auto &origin_shape = origin_weights->getShape(); assert(origin_shape.rank() == 4); @@ -42,7 +42,7 @@ std::unique_ptr createTransposedWeights(const backend::IPortableTensor * ir::Shape{origin_shape.dim(1), origin_shape.dim(2), origin_shape.dim(3), origin_shape.dim(0)}; transposed_info.shape(transposed_shape); - return std::make_unique(transposed_info); + return std::make_shared(transposed_info); } } // namespace @@ -79,27 +79,30 @@ void ConvolutionLayer::configureBackward(const IPortableTensor *weights, if (_dilationHeightFactor != 1 || _dilationWidthFactor != 1) throw std::runtime_error("train ConvolutionLayer: Unsupported dilation yet"); - // TODO Optimize transposed tensors - _transposed_weights = createTransposedWeights(weights); - _transposed_weights->setBuffer( - std::make_shared(_transposed_weights->total_size())); + _transposed_weights = createTransposedWeights(weights); - _conv_back_prop_output = std::make_unique(back_prop_output->get_info()); - _conv_back_prop_output->setBuffer( - std::make_shared(_conv_back_prop_output->total_size())); + _conv_back_prop_output = std::make_shared(back_prop_output->get_info()); - _transposed_grad_weights = createTransposedWeights(weights); - _transposed_grad_weights->setBuffer( - std::make_shared(_transposed_grad_weights->total_size())); + _transposed_grad_weights = createTransposedWeights(weights); if (activation != ir::Activation::NONE) { - _act_back_prop_output = std::make_unique(_back_prop_output->get_info()); - _act_back_prop_output->setBuffer( - std::make_shared(_act_back_prop_output->total_size())); + _act_back_prop_output = std::make_unique(_back_prop_output->get_info()); } } +std::optional ConvolutionLayer::registerExtraTensors() +{ + ExtraTensors tensors = {_transposed_weights, _conv_back_prop_output, _transposed_grad_weights}; + + if (_act_back_prop_output != nullptr) + { + tensors.push_back(_act_back_prop_output); + } + + return std::optional(tensors); +} + void ConvolutionLayer::forward(bool) { cpu::ops::ConvolutionLayer::run(); } void ConvolutionLayer::backward() { diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.h b/runtime/onert/backend/train/ops/ConvolutionLayer.h index ef11f68bf57..6df64a26b41 100644 --- a/runtime/onert/backend/train/ops/ConvolutionLayer.h +++ b/runtime/onert/backend/train/ops/ConvolutionLayer.h @@ -41,6 +41,7 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction, void configureBackward(const IPortableTensor *weights, IPortableTensor *back_prop_input, IPortableTensor *grad_weights, IPortableTensor *grad_bias, const IPortableTensor *back_prop_output, const ir::Activation activation); + std::optional registerExtraTensors() override; void forward(bool training) override; void backward() override; @@ -54,10 +55,10 @@ class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction, const IPortableTensor *_back_prop_output; // TODO Consider if these tensors should be built in TensorBuilder - std::unique_ptr _transposed_weights; - std::unique_ptr _conv_back_prop_output; - std::unique_ptr _act_back_prop_output; - std::unique_ptr _transposed_grad_weights; + std::shared_ptr _transposed_weights; + std::shared_ptr _conv_back_prop_output; + std::shared_ptr _transposed_grad_weights; + std::shared_ptr _act_back_prop_output; }; } // namespace ops diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc index 9d35655b26f..1e57f795228 100644 --- a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc +++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc @@ -28,7 +28,7 @@ namespace using namespace onert; -std::unique_ptr +std::shared_ptr createTransposedTensor(const backend::IPortableTensor *origin_tensor) { const auto &origin_shape = origin_tensor->getShape(); @@ -38,7 +38,7 @@ createTransposedTensor(const backend::IPortableTensor *origin_tensor) auto transposed_shape = ir::Shape{origin_shape.dim(1), origin_shape.dim(0)}; transposed_info.shape(transposed_shape); - return std::make_unique(transposed_info); + return std::make_shared(transposed_info); } } // namespace @@ -86,23 +86,28 @@ void FullyConnectedLayer::configureBackward( "train FullyConnectedLayer: Input other ranks than 2 are not supported."}; _transposed_weights = createTransposedTensor(weights); - _transposed_weights->setBuffer(std::make_shared(weights->total_size())); _transposed_input = createTransposedTensor(input); - _transposed_input->setBuffer(std::make_shared(input->total_size())); _transposed_back_prop_output = createTransposedTensor(back_prop_output); - _transposed_back_prop_output->setBuffer( - std::make_shared(back_prop_output->total_size())); if (activation != ir::Activation::NONE) { - _act_back_prop_output = std::make_unique(_back_prop_output->get_info()); - _act_back_prop_output->setBuffer( - std::make_shared(_back_prop_output->total_size())); + _act_back_prop_output = std::make_shared(_back_prop_output->get_info()); } } +std::optional FullyConnectedLayer::registerExtraTensors() +{ + ExtraTensors tensors = {_transposed_weights, _transposed_input, _transposed_back_prop_output}; + if (_act_back_prop_output != nullptr) + { + tensors.push_back(_act_back_prop_output); + } + + return tensors; +} + void FullyConnectedLayer::forward(bool) { cpu::ops::FullyConnectedLayer::run(); } void FullyConnectedLayer::backward() diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.h b/runtime/onert/backend/train/ops/FullyConnectedLayer.h index 190bfbffe42..5a691cb176b 100644 --- a/runtime/onert/backend/train/ops/FullyConnectedLayer.h +++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.h @@ -46,6 +46,7 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction, const IPortableTensor *back_prop_output, ir::Activation activation, ir::FullyConnectedWeightsFormat weights_format); + std::optional registerExtraTensors() override; void forward(bool training) override; void backward() override; @@ -58,11 +59,10 @@ class FullyConnectedLayer : public exec::train::ITrainableFunction, IPortableTensor *_back_prop_input; const IPortableTensor *_back_prop_output; - // TODO Optimize memory - std::unique_ptr _transposed_weights; - std::unique_ptr _transposed_input; - std::unique_ptr _transposed_back_prop_output; - std::unique_ptr _act_back_prop_output; + std::shared_ptr _transposed_weights; + std::shared_ptr _transposed_input; + std::shared_ptr _transposed_back_prop_output; + std::shared_ptr _act_back_prop_output; }; } // namespace ops diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc index f77d58e6517..9a9f7b08701 100644 --- a/runtime/onert/backend/train/ops/PoolLayer.cc +++ b/runtime/onert/backend/train/ops/PoolLayer.cc @@ -41,8 +41,8 @@ class MaxPool2D final : public TrainingKernelRegistry const IPortableTensor *_output; nnfw::cker::PoolParams _op_params; - std::unique_ptr _act_back_prop_output; - std::unique_ptr _arg_max_index; + std::shared_ptr _act_back_prop_output; + std::shared_ptr _arg_max_index; public: MaxPool2D(const uint32_t paddingLeft, const uint32_t, const uint32_t paddingTop, const uint32_t, @@ -62,20 +62,30 @@ class MaxPool2D final : public TrainingKernelRegistry &_op_params.float_activation_max); } - _arg_max_index = std::make_unique(_output->get_info()); - _arg_max_index->setBuffer(std::make_shared(_output->total_size())); + _arg_max_index = std::make_shared(_output->get_info()); if (activation != ir::Activation::NONE) { - _act_back_prop_output = std::make_unique(_output->get_info()); - _act_back_prop_output->setBuffer(std::make_shared(_output->total_size())); + _act_back_prop_output = std::make_shared(_output->get_info()); } }; ~MaxPool2D() {} public: - void forward(const IPortableTensor *in, IPortableTensor *out) + std::optional registerExtraTensors() override + { + ExtraTensors tensors = {_arg_max_index}; + if (_act_back_prop_output != nullptr) + { + tensors.push_back(_act_back_prop_output); + } + + return std::optional(tensors); + } + +public: + void forward(const IPortableTensor *in, IPortableTensor *out) override { assert(in->layout() == ir::Layout::NHWC); @@ -88,7 +98,7 @@ class MaxPool2D final : public TrainingKernelRegistry out_data, getBuffer(arg_max_index)); } - void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) + void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) override { assert(back_prop_out->layout() == ir::Layout::NHWC); @@ -110,7 +120,7 @@ class MaxPool2D final : public TrainingKernelRegistry getBuffer(arg_max_index), getShape(back_prop_in), getBuffer(back_prop_in)); } -}; +}; // namespace ops } // namespace @@ -149,6 +159,11 @@ void PoolLayer::configureBackward(const uint32_t paddingLeft, const uint32_t pad } } +std::optional PoolLayer::registerExtraTensors() +{ + return _kernel->registerExtraTensors(); +} + void PoolLayer::forward(bool training) { if (training) diff --git a/runtime/onert/backend/train/ops/PoolLayer.h b/runtime/onert/backend/train/ops/PoolLayer.h index 5ced951ae6a..f76fc5b7020 100644 --- a/runtime/onert/backend/train/ops/PoolLayer.h +++ b/runtime/onert/backend/train/ops/PoolLayer.h @@ -38,6 +38,8 @@ class TrainingKernelRegistry public: virtual void forward(const IPortableTensor *in, IPortableTensor *out) = 0; virtual void backward(const IPortableTensor *back_prop_out, IPortableTensor *back_prop_in) = 0; + virtual std::optional registerExtraTensors() = 0; + TrainingKernelRegistry() = default; virtual ~TrainingKernelRegistry() = default; }; @@ -61,6 +63,7 @@ class PoolLayer : public ::onert::exec::train::ITrainableFunction, public cpu::o IPortableTensor *output, IPortableTensor *back_prop_input, const IPortableTensor *back_prop_output); + std::optional registerExtraTensors() override; void forward(bool training) override; void backward() override; diff --git a/runtime/onert/core/include/backend/train/ExtraTensor.h b/runtime/onert/core/include/backend/train/ExtraTensor.h index 15253e6deb4..e0b63d359bc 100644 --- a/runtime/onert/core/include/backend/train/ExtraTensor.h +++ b/runtime/onert/core/include/backend/train/ExtraTensor.h @@ -26,6 +26,12 @@ namespace backend namespace train { +enum class ExtraTensorLifeTime +{ + BACKWARD, // alive during backward() + FORWARD_TO_BACKWARD, // alive from forward to backward() +}; + // ExtraTensor is a tensor that is accessed within one operation layer. // In other words, the scope of the extra tensor is confined to one specific layer. class ExtraTensor final : public basic::Tensor @@ -34,12 +40,26 @@ class ExtraTensor final : public basic::Tensor ExtraTensor() = delete; public: - ExtraTensor(const ir::OperandInfo &info) : basic::Tensor(info, nullptr) + ExtraTensor(const ir::OperandInfo &info, ExtraTensorLifeTime lt) + : basic::Tensor(info, nullptr), _lifetime(lt) + { + // DO NOTHING + } + + ExtraTensor(const ir::OperandInfo &info) + : basic::Tensor(info, nullptr), _lifetime(ExtraTensorLifeTime::BACKWARD) { // DO NOTHING } + + ExtraTensorLifeTime lifetime() const { return _lifetime; } + +private: + ExtraTensorLifeTime _lifetime; }; +using ExtraTensors = std::vector>; + } // namespace train } // namespace backend } // namespace onert diff --git a/runtime/onert/core/include/exec/train/ITrainableFunction.h b/runtime/onert/core/include/exec/train/ITrainableFunction.h index 45adc258f68..fd57ae3516b 100644 --- a/runtime/onert/core/include/exec/train/ITrainableFunction.h +++ b/runtime/onert/core/include/exec/train/ITrainableFunction.h @@ -18,6 +18,9 @@ #define __ONERT_EXEC_TRAIN_I_TRAINABLE_FUNCTION_H__ #include +#include + +#include "backend/train/ExtraTensor.h" namespace onert { @@ -26,12 +29,21 @@ namespace exec namespace train { +// Q: function 'extraExtraTensors' is not PURE virutal function, If so, Do we need to change this +// class name? ITrainableFunction -> TrainableFunction + class ITrainableFunction { public: virtual ~ITrainableFunction() = default; virtual void forward(bool training) = 0; virtual void backward() = 0; + + // Implement this if extra tensor is needed + virtual std::optional registerExtraTensors() + { + return std::nullopt; + } }; } // namespace train