From e00553cd19478d99ac3646242d2614693d1089ac Mon Sep 17 00:00:00 2001 From: xiaying Date: Tue, 23 Apr 2024 13:54:38 +0800 Subject: [PATCH] [MNN:Bugfix] Fix bug for resize opt bug, support llama3 8b --- llm/include/llm.hpp | 13 ++++++++- llm/src/llm.cpp | 17 +++++++++++ source/core/Pipeline.cpp | 32 ++++++++++++++------- source/core/Pipeline.hpp | 3 +- source/core/Schedule.cpp | 3 ++ source/core/Schedule.hpp | 1 + source/core/Session.cpp | 1 + source/core/Tensor.cpp | 4 +-- source/core/TensorUtils.hpp | 6 ++-- source/geometry/GeometryComputerUtils.cpp | 35 +++++++++-------------- 10 files changed, 75 insertions(+), 40 deletions(-) diff --git a/llm/include/llm.hpp b/llm/include/llm.hpp index 5b56fb481..d003ed106 100644 --- a/llm/include/llm.hpp +++ b/llm/include/llm.hpp @@ -228,7 +228,18 @@ class Llama2_7b : public Llm { virtual VARP gen_position_ids(int seq_len) override; virtual bool is_stop(int token_id) override; }; - +class Llama3_8b : public Llama2_7b { +public: + Llama3_8b() { + model_name_ = "Llama3_8b"; + layer_nums_ = 32; + key_value_shape_ = {2, 1, 8, 0, 128}; + hidden_size_ = 4096; + } +private: + virtual std::vector tokenizer(const std::string& query) override; + virtual bool is_stop(int token_id) override; +}; class Qwen2 : public Llama2_7b { public: Qwen2() { diff --git a/llm/src/llm.cpp b/llm/src/llm.cpp index 118c2c019..89c38fa1b 100644 --- a/llm/src/llm.cpp +++ b/llm/src/llm.cpp @@ -13,6 +13,7 @@ #include #include +#include "cpp/ExprDebug.hpp" #include "llm.hpp" #include "tokenizer.hpp" @@ -86,6 +87,9 @@ Llm* Llm::createLLM(const std::string& path, std::string model_type, int forward } else if (model_type.find("yi") != std::string::npos) { llm = new Yi_6b; llm->model_name_ = "Yi_6b"; + } else if (model_type.find("llama3") != std::string::npos) { + llm = new Llama3_8b; + llm->model_name_ = "Llama3_8b"; } if (!llm) { std::cerr << "model type can't judge!" << std::endl; @@ -229,6 +233,8 @@ void Llm::load(const std::string& model_dir) { config.backendConfig = &cpuBackendConfig; runtime_manager_.reset(Executor::RuntimeManager::createRuntimeManager(config)); runtime_manager_->setHint(MNN::Interpreter::MEM_ALLOCATOR_TYPE, 0); +// runtime_manager_->setMode(MNN::Interpreter::Session_Debug); +// _initTensorStatic(); { runtime_manager_->setCache(".tempcache"); } @@ -801,6 +807,17 @@ std::vector Yi_6b::tokenizer(const std::string& query) { bool Yi_6b::is_stop(int token_id) { return token_id == 7 || token_id == 64001; } +std::vector Llama3_8b::tokenizer(const std::string& query) { + // <|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n+query+<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n + auto ids = tokenizer_encode(query); + ids.insert(ids.begin(), {128000, 128006, 882, 128007, 271}); + ids.insert(ids.end(), {128009, 128006, 78191, 128007, 271}); + return ids; +} + +bool Llama3_8b::is_stop(int token_id) { + return token_id == 128001 || token_id == 128009; +} // Llm end // Embedding start diff --git a/source/core/Pipeline.cpp b/source/core/Pipeline.cpp index 163b7a2a3..3be39b13a 100644 --- a/source/core/Pipeline.cpp +++ b/source/core/Pipeline.cpp @@ -706,16 +706,26 @@ static void _makeCopyOp(std::shared_ptr& copyOp) { copyOp->storage = builder.ReleaseRaw(copyOp->allocated_size, copyOp->offset); } } -static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map>& mCacheConstTensors, std::map, std::shared_ptr>& shapeFixConstCache, bool ownInput, bool permitCodegen) { +static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map>& mCacheConstTensors, Pipeline::WrapTensorCache& shapeFixConstCache, bool ownInput, bool permitCodegen) { std::shared_ptr copyOp; - for (auto& iter : shapeFixConstCache) { - auto des = TensorUtils::getDescribe(iter.second.get()); - if (des->usage == Tensor::InsideDescribe::CONSTANT && des->stageMask == 0) { - // If the tensor is not compute in shape-geometry stage, needn't recopy it + for (auto iterP = shapeFixConstCache.begin(); iterP != shapeFixConstCache.end();) { + auto& iter = *iterP; + if (iter.second.first.lock() == nullptr) { + // Has released, remove cache + iterP = shapeFixConstCache.erase(iterP); continue; } - TensorUtils::getDescribeOrigin(iter.second.get())->setBackend(nullptr); - TensorUtils::getDescribeOrigin(iter.second.get())->mem = nullptr; + auto des = iter.first.first; + bool needReset = true; + if (des->usage == Tensor::InsideDescribe::CONSTANT && ((des->stageMask & Tensor::InsideDescribe::CONTENT_NOT_CHANGE) != 0)) { + // If the tensor is not compute in shape-geometry stage, needn't recopy it + needReset = false; + } + if (needReset) { + TensorUtils::getDescribeOrigin(iter.second.second.get())->setBackend(nullptr); + TensorUtils::getDescribeOrigin(iter.second.second.get())->mem = nullptr; + } + iterP++; } for (auto& info : mInfo.second) { if (info.type == Schedule::CONSTANT) { @@ -778,12 +788,12 @@ static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::mapsecond.get(); + newTensor = titer->second.second.get(); } else { std::shared_ptr tensor(new Tensor); - shapeFixConstCache.insert(std::make_pair(std::make_pair(t, curBackend), tensor)); + shapeFixConstCache.insert(std::make_pair(std::make_pair(des, curBackend), std::make_pair(std::weak_ptr(TensorUtils::getDescribeOrigin(t)->mContent), tensor))); newTensor = tensor.get(); } iter.workInputs[v] = newTensor; @@ -1067,7 +1077,7 @@ ErrorCode Pipeline::allocMemory(bool firstMalloc, bool forbidReplace) { } auto des = TensorUtils::getDescribe(t); auto usage = des->usage; - if (TensorUtils::getDescribeOrigin(t)->mContent->count() > 1 && usage != Tensor::InsideDescribe::CONSTANT) { + if (TensorUtils::getDescribeOrigin(t)->mContent.use_count() > 1 && usage != Tensor::InsideDescribe::CONSTANT) { TensorUtils::getDescribeOrigin(t)->mem = nullptr; auto res = TensorUtils::getDescribeOrigin(t)->getBackend()->onAcquireBuffer(t, Backend::STATIC); if (!res) { diff --git a/source/core/Pipeline.hpp b/source/core/Pipeline.hpp index 6fcaf142e..c32701db5 100644 --- a/source/core/Pipeline.hpp +++ b/source/core/Pipeline.hpp @@ -62,6 +62,7 @@ class Pipeline : public NonCopyable { MNNForwardType getMainForwardType() const { return mInfo.first.cache.first->type(); } + typedef std::map, std::pair, std::shared_ptr>> WrapTensorCache; private: ErrorCode _allocForTensor(int index, bool allocInput); void _copyInputs(); @@ -76,7 +77,7 @@ class Pipeline : public NonCopyable { // For gpu or other backend std::map> mCacheConstTensors; - std::map, std::shared_ptr> mWrapTensors; + WrapTensorCache mWrapTensors; #ifndef MNN_BUILD_MINI GeometryComputer::Context mContext; Runtime::CompilerType mUseGeometry; diff --git a/source/core/Schedule.cpp b/source/core/Schedule.cpp index 1c8cd0878..74b949941 100644 --- a/source/core/Schedule.cpp +++ b/source/core/Schedule.cpp @@ -81,6 +81,9 @@ bool Schedule::OpResizeCache::match(const std::vector& inputs) { void Schedule::OpResizeCache::open() { mCanCache = true; } +void Schedule::OpResizeCache::copyImmutable(const OpResizeCache& cache) { + mNeedCompareContent = cache.mNeedCompareContent; +} void Schedule::OpResizeCache::insert(const std::vector& inputs) { if (!mCanCache) { diff --git a/source/core/Schedule.hpp b/source/core/Schedule.hpp index 476f37343..e05c3133c 100644 --- a/source/core/Schedule.hpp +++ b/source/core/Schedule.hpp @@ -42,6 +42,7 @@ class MNN_PUBLIC Schedule { bool needComputeShape = true; bool needExecuteConst = false; void addContentIndex(int index); + void copyImmutable(const OpResizeCache& cache); private: struct ShapeInfo { int order; diff --git a/source/core/Session.cpp b/source/core/Session.cpp index 9537bee77..5998c9253 100644 --- a/source/core/Session.cpp +++ b/source/core/Session.cpp @@ -427,6 +427,7 @@ Session* Session::clone(RuntimeInfo&& runtime, std::shared_ptroutputIndexes()) { auto data = op->outputIndexes()->data(); diff --git a/source/core/Tensor.cpp b/source/core/Tensor.cpp index 5bf50a0b2..4165f1ab7 100644 --- a/source/core/Tensor.cpp +++ b/source/core/Tensor.cpp @@ -20,7 +20,7 @@ namespace MNN { Tensor::Tensor(int dimSize, DimensionType type) { MNN_ASSERT(dimSize <= MNN_MAX_TENSOR_DIM); mDescribe = new InsideDescribe; - mDescribe->mContent = new InsideDescribe::NativeInsideDescribe; + mDescribe->mContent.reset(new InsideDescribe::NativeInsideDescribe); auto nativeDescribe = mDescribe->mContent.get(); mBuffer.dimensions = dimSize; mBuffer.type = halide_type_of(); @@ -49,7 +49,7 @@ Tensor::Tensor(const Tensor* tensor, DimensionType type, bool allocMemory) { auto buffer = tensor->buffer(); mDescribe = new InsideDescribe; - mDescribe->mContent = new InsideDescribe::NativeInsideDescribe; + mDescribe->mContent.reset(new InsideDescribe::NativeInsideDescribe); auto nativeDescribe = mDescribe->mContent.get(); mBuffer.dimensions = buffer.dimensions; mBuffer.type = buffer.type; diff --git a/source/core/TensorUtils.hpp b/source/core/TensorUtils.hpp index 61d15776a..d8f8498ec 100644 --- a/source/core/TensorUtils.hpp +++ b/source/core/TensorUtils.hpp @@ -79,10 +79,10 @@ struct Tensor::InsideDescribe { GEOMETRY_STAGE = 1, CONVERTED_STAGE = 1 << 1, COMPUTE_SHAPE_STAGE = 1 << 2, - COMPUTE_CONTENT_STAGE = 1 << 3, + CONTENT_NOT_CHANGE = 1 << 3, }; /** extra tensor info container */ - struct NativeInsideDescribe : public RefCount { + struct NativeInsideDescribe { public: /** dimension format */ MNN_DATA_FORMAT dimensionFormat = MNN_DATA_FORMAT_NC4HW4; @@ -115,7 +115,7 @@ struct Tensor::InsideDescribe { // For isMutable = false Tensor , determine whether the content can be convert to main backend uint32_t stageMask = 0; }; - SharedPtr mContent; + std::shared_ptr mContent; SharedPtr mem; inline Backend* getBackend() const { return backend; diff --git a/source/geometry/GeometryComputerUtils.cpp b/source/geometry/GeometryComputerUtils.cpp index a9377df7d..0dc328691 100644 --- a/source/geometry/GeometryComputerUtils.cpp +++ b/source/geometry/GeometryComputerUtils.cpp @@ -164,8 +164,8 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform( auto type = des->memoryType; MNN_ASSERT(type != Tensor::InsideDescribe::MEMORY_OUTSIDE); MNN_ASSERT(type != Tensor::InsideDescribe::MEMORY_HOST); - if (TensorUtils::getDescribeOrigin(t)->mContent->count() > 1) { - TensorUtils::getDescribeOrigin(t)->mContent = new Tensor::InsideDescribe::NativeInsideDescribe; + if (TensorUtils::getDescribeOrigin(t)->mContent.use_count() > 1) { + TensorUtils::getDescribeOrigin(t)->mContent.reset(new Tensor::InsideDescribe::NativeInsideDescribe); t->buffer().dim = TensorUtils::getDescribe(t)->dims; TensorUtils::getDescribeOrigin(t)->setBackend(nullptr); TensorUtils::getDescribeOrigin(t)->mem = nullptr; @@ -210,13 +210,18 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform( TensorUtils::getDescribe(t)->rasterCommand.reset(); TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::COMPUTE_SHAPE_STAGE; // The content may be computed by geometry computer, which will not make execution - TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE); + TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE); } } info.computeCache.needComputeShape = needCompute; if (info.type != Schedule::CONSTANT) { continue; } + if (!needCompute) { + for (auto t : info.outputs) { + TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE; + } + } if (_hasZeroShapeOutput(info)) { continue; } @@ -292,7 +297,7 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform( dirty = true; break; } - if ((des->stageMask & Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE) == 0) { + if ((des->stageMask & Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE) == 0) { dirty = true; break; } @@ -305,26 +310,12 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform( return NOT_SUPPORT; } for (auto t : c.outputs) { - TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE); + TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE); } - } - } - } - for (int i=0; iisMutable) || des->group) { - continue; + } else { + for (auto t : c.outputs) { + TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE; } - des->stageMask |= Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE; } } }