Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MNN:Bugfix] Fix bug for resize opt bug, support llama3 8b #2842

Merged
merged 1 commit into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion llm/include/llm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,18 @@ class Llama2_7b : public Llm {
virtual VARP gen_position_ids(int seq_len) override;
virtual bool is_stop(int token_id) override;
};

class Llama3_8b : public Llama2_7b {
public:
Llama3_8b() {
model_name_ = "Llama3_8b";
layer_nums_ = 32;
key_value_shape_ = {2, 1, 8, 0, 128};
hidden_size_ = 4096;
}
private:
virtual std::vector<int> tokenizer(const std::string& query) override;
virtual bool is_stop(int token_id) override;
};
class Qwen2 : public Llama2_7b {
public:
Qwen2() {
Expand Down
17 changes: 17 additions & 0 deletions llm/src/llm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include <MNN/expr/ExecutorScope.hpp>
#include <MNN/AutoTime.hpp>
#include "cpp/ExprDebug.hpp"
#include "llm.hpp"
#include "tokenizer.hpp"

Expand Down Expand Up @@ -86,6 +87,9 @@ Llm* Llm::createLLM(const std::string& path, std::string model_type, int forward
} else if (model_type.find("yi") != std::string::npos) {
llm = new Yi_6b;
llm->model_name_ = "Yi_6b";
} else if (model_type.find("llama3") != std::string::npos) {
llm = new Llama3_8b;
llm->model_name_ = "Llama3_8b";
}
if (!llm) {
std::cerr << "model type can't judge!" << std::endl;
Expand Down Expand Up @@ -229,6 +233,8 @@ void Llm::load(const std::string& model_dir) {
config.backendConfig = &cpuBackendConfig;
runtime_manager_.reset(Executor::RuntimeManager::createRuntimeManager(config));
runtime_manager_->setHint(MNN::Interpreter::MEM_ALLOCATOR_TYPE, 0);
// runtime_manager_->setMode(MNN::Interpreter::Session_Debug);
// _initTensorStatic();
{
runtime_manager_->setCache(".tempcache");
}
Expand Down Expand Up @@ -801,6 +807,17 @@ std::vector<int> Yi_6b::tokenizer(const std::string& query) {
bool Yi_6b::is_stop(int token_id) {
return token_id == 7 || token_id == 64001;
}
std::vector<int> Llama3_8b::tokenizer(const std::string& query) {
// <|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n+query+<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n
auto ids = tokenizer_encode(query);
ids.insert(ids.begin(), {128000, 128006, 882, 128007, 271});
ids.insert(ids.end(), {128009, 128006, 78191, 128007, 271});
return ids;
}

bool Llama3_8b::is_stop(int token_id) {
return token_id == 128001 || token_id == 128009;
}
// Llm end

// Embedding start
Expand Down
32 changes: 21 additions & 11 deletions source/core/Pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,16 +706,26 @@ static void _makeCopyOp(std::shared_ptr<BufferStorage>& copyOp) {
copyOp->storage = builder.ReleaseRaw(copyOp->allocated_size, copyOp->offset);
}
}
static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map<Tensor*, std::shared_ptr<Tensor>>& mCacheConstTensors, std::map<std::pair<Tensor*, Backend*>, std::shared_ptr<Tensor>>& shapeFixConstCache, bool ownInput, bool permitCodegen) {
static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map<Tensor*, std::shared_ptr<Tensor>>& mCacheConstTensors, Pipeline::WrapTensorCache& shapeFixConstCache, bool ownInput, bool permitCodegen) {
std::shared_ptr<BufferStorage> copyOp;
for (auto& iter : shapeFixConstCache) {
auto des = TensorUtils::getDescribe(iter.second.get());
if (des->usage == Tensor::InsideDescribe::CONSTANT && des->stageMask == 0) {
// If the tensor is not compute in shape-geometry stage, needn't recopy it
for (auto iterP = shapeFixConstCache.begin(); iterP != shapeFixConstCache.end();) {
auto& iter = *iterP;
if (iter.second.first.lock() == nullptr) {
// Has released, remove cache
iterP = shapeFixConstCache.erase(iterP);
continue;
}
TensorUtils::getDescribeOrigin(iter.second.get())->setBackend(nullptr);
TensorUtils::getDescribeOrigin(iter.second.get())->mem = nullptr;
auto des = iter.first.first;
bool needReset = true;
if (des->usage == Tensor::InsideDescribe::CONSTANT && ((des->stageMask & Tensor::InsideDescribe::CONTENT_NOT_CHANGE) != 0)) {
// If the tensor is not compute in shape-geometry stage, needn't recopy it
needReset = false;
}
if (needReset) {
TensorUtils::getDescribeOrigin(iter.second.second.get())->setBackend(nullptr);
TensorUtils::getDescribeOrigin(iter.second.second.get())->mem = nullptr;
}
iterP++;
}
for (auto& info : mInfo.second) {
if (info.type == Schedule::CONSTANT) {
Expand Down Expand Up @@ -778,12 +788,12 @@ static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map<Tensor*, st
}
}
{
auto titer = shapeFixConstCache.find(std::make_pair(t, curBackend));
auto titer = shapeFixConstCache.find(std::make_pair(des, curBackend));
if (titer != shapeFixConstCache.end()) {
newTensor = titer->second.get();
newTensor = titer->second.second.get();
} else {
std::shared_ptr<MNN::Tensor> tensor(new Tensor);
shapeFixConstCache.insert(std::make_pair(std::make_pair(t, curBackend), tensor));
shapeFixConstCache.insert(std::make_pair(std::make_pair(des, curBackend), std::make_pair(std::weak_ptr<Tensor::InsideDescribe::NativeInsideDescribe>(TensorUtils::getDescribeOrigin(t)->mContent), tensor)));
newTensor = tensor.get();
}
iter.workInputs[v] = newTensor;
Expand Down Expand Up @@ -1067,7 +1077,7 @@ ErrorCode Pipeline::allocMemory(bool firstMalloc, bool forbidReplace) {
}
auto des = TensorUtils::getDescribe(t);
auto usage = des->usage;
if (TensorUtils::getDescribeOrigin(t)->mContent->count() > 1 && usage != Tensor::InsideDescribe::CONSTANT) {
if (TensorUtils::getDescribeOrigin(t)->mContent.use_count() > 1 && usage != Tensor::InsideDescribe::CONSTANT) {
TensorUtils::getDescribeOrigin(t)->mem = nullptr;
auto res = TensorUtils::getDescribeOrigin(t)->getBackend()->onAcquireBuffer(t, Backend::STATIC);
if (!res) {
Expand Down
3 changes: 2 additions & 1 deletion source/core/Pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class Pipeline : public NonCopyable {
MNNForwardType getMainForwardType() const {
return mInfo.first.cache.first->type();
}
typedef std::map<std::pair<Tensor::InsideDescribe::NativeInsideDescribe*, Backend*>, std::pair<std::weak_ptr<Tensor::InsideDescribe::NativeInsideDescribe>, std::shared_ptr<Tensor>>> WrapTensorCache;
private:
ErrorCode _allocForTensor(int index, bool allocInput);
void _copyInputs();
Expand All @@ -76,7 +77,7 @@ class Pipeline : public NonCopyable {

// For gpu or other backend
std::map<Tensor*, std::shared_ptr<Tensor>> mCacheConstTensors;
std::map<std::pair<Tensor*, Backend*>, std::shared_ptr<Tensor>> mWrapTensors;
WrapTensorCache mWrapTensors;
#ifndef MNN_BUILD_MINI
GeometryComputer::Context mContext;
Runtime::CompilerType mUseGeometry;
Expand Down
3 changes: 3 additions & 0 deletions source/core/Schedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ bool Schedule::OpResizeCache::match(const std::vector<Tensor*>& inputs) {
void Schedule::OpResizeCache::open() {
mCanCache = true;
}
void Schedule::OpResizeCache::copyImmutable(const OpResizeCache& cache) {
mNeedCompareContent = cache.mNeedCompareContent;
}

void Schedule::OpResizeCache::insert(const std::vector<Tensor*>& inputs) {
if (!mCanCache) {
Expand Down
1 change: 1 addition & 0 deletions source/core/Schedule.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class MNN_PUBLIC Schedule {
bool needComputeShape = true;
bool needExecuteConst = false;
void addContentIndex(int index);
void copyImmutable(const OpResizeCache& cache);
private:
struct ShapeInfo {
int order;
Expand Down
1 change: 1 addition & 0 deletions source/core/Session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ Session* Session::clone(RuntimeInfo&& runtime, std::shared_ptr<Schedule::Schedul
auto& opInfo = oplists[i];
opInfo.op = opCaches[i].op;
opInfo.type = srcOpInfo.type;
opInfo.computeCache.copyImmutable(srcOpInfo.computeCache);
auto op = opInfo.op;
if (nullptr != op->outputIndexes()) {
auto data = op->outputIndexes()->data();
Expand Down
4 changes: 2 additions & 2 deletions source/core/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace MNN {
Tensor::Tensor(int dimSize, DimensionType type) {
MNN_ASSERT(dimSize <= MNN_MAX_TENSOR_DIM);
mDescribe = new InsideDescribe;
mDescribe->mContent = new InsideDescribe::NativeInsideDescribe;
mDescribe->mContent.reset(new InsideDescribe::NativeInsideDescribe);
auto nativeDescribe = mDescribe->mContent.get();
mBuffer.dimensions = dimSize;
mBuffer.type = halide_type_of<float>();
Expand Down Expand Up @@ -49,7 +49,7 @@ Tensor::Tensor(const Tensor* tensor, DimensionType type, bool allocMemory) {

auto buffer = tensor->buffer();
mDescribe = new InsideDescribe;
mDescribe->mContent = new InsideDescribe::NativeInsideDescribe;
mDescribe->mContent.reset(new InsideDescribe::NativeInsideDescribe);
auto nativeDescribe = mDescribe->mContent.get();
mBuffer.dimensions = buffer.dimensions;
mBuffer.type = buffer.type;
Expand Down
6 changes: 3 additions & 3 deletions source/core/TensorUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ struct Tensor::InsideDescribe {
GEOMETRY_STAGE = 1,
CONVERTED_STAGE = 1 << 1,
COMPUTE_SHAPE_STAGE = 1 << 2,
COMPUTE_CONTENT_STAGE = 1 << 3,
CONTENT_NOT_CHANGE = 1 << 3,
};
/** extra tensor info container */
struct NativeInsideDescribe : public RefCount {
struct NativeInsideDescribe {
public:
/** dimension format */
MNN_DATA_FORMAT dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
Expand Down Expand Up @@ -115,7 +115,7 @@ struct Tensor::InsideDescribe {
// For isMutable = false Tensor , determine whether the content can be convert to main backend
uint32_t stageMask = 0;
};
SharedPtr<NativeInsideDescribe> mContent;
std::shared_ptr<NativeInsideDescribe> mContent;
SharedPtr<Backend::MemObj> mem;
inline Backend* getBackend() const {
return backend;
Expand Down
35 changes: 13 additions & 22 deletions source/geometry/GeometryComputerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
auto type = des->memoryType;
MNN_ASSERT(type != Tensor::InsideDescribe::MEMORY_OUTSIDE);
MNN_ASSERT(type != Tensor::InsideDescribe::MEMORY_HOST);
if (TensorUtils::getDescribeOrigin(t)->mContent->count() > 1) {
TensorUtils::getDescribeOrigin(t)->mContent = new Tensor::InsideDescribe::NativeInsideDescribe;
if (TensorUtils::getDescribeOrigin(t)->mContent.use_count() > 1) {
TensorUtils::getDescribeOrigin(t)->mContent.reset(new Tensor::InsideDescribe::NativeInsideDescribe);
t->buffer().dim = TensorUtils::getDescribe(t)->dims;
TensorUtils::getDescribeOrigin(t)->setBackend(nullptr);
TensorUtils::getDescribeOrigin(t)->mem = nullptr;
Expand Down Expand Up @@ -210,13 +210,18 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
TensorUtils::getDescribe(t)->rasterCommand.reset();
TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::COMPUTE_SHAPE_STAGE;
// The content may be computed by geometry computer, which will not make execution
TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE);
TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE);
}
}
info.computeCache.needComputeShape = needCompute;
if (info.type != Schedule::CONSTANT) {
continue;
}
if (!needCompute) {
for (auto t : info.outputs) {
TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE;
}
}
if (_hasZeroShapeOutput(info)) {
continue;
}
Expand Down Expand Up @@ -292,7 +297,7 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
dirty = true;
break;
}
if ((des->stageMask & Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE) == 0) {
if ((des->stageMask & Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE) == 0) {
dirty = true;
break;
}
Expand All @@ -305,26 +310,12 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
return NOT_SUPPORT;
}
for (auto t : c.outputs) {
TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE);
TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE);
}
}
}
}
for (int i=0; i<infos.size(); ++i) {
auto& info = infos[i];
if (info.type != Schedule::CONSTANT) {
continue;
}
auto& cmdBufferVir = info.executeBuffer;
for (auto& cp : cmdBufferVir.command) {
auto& c = *cp;
bool dirty = false;
for (auto t : c.inputs) {
auto des = TensorUtils::getDescribe(t);
if ((!des->isMutable) || des->group) {
continue;
} else {
for (auto t : c.outputs) {
TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE;
}
des->stageMask |= Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE;
}
}
}
Expand Down
Loading