diff --git a/CMakeLists.txt b/CMakeLists.txt index 394e3b63..d5499958 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -205,7 +205,7 @@ if(QNN) set(CMAKE_LD_FLAGS "-shared -s -fPIC -pthread -fvisibility=hidden -flto") add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/src/backends/QNN) - add_executable(qnn_test ${PROJECT_SOURCE_DIR}/demo/qnn/qnn_test.cpp ${DIR_SRC_MEM_MANAGER} ${DIR_SRC_CPU} ${DIR_SRC_EXP} ${DIR_SRC} ) + add_executable(qnn_test ${PROJECT_SOURCE_DIR}/demo/qnn/qnn_test.cpp ${PROJECT_SOURCE_DIR}/demo/qnn/qnn_wrapper.hpp ${DIR_SRC_MEM_MANAGER} ${DIR_SRC_CPU} ${DIR_SRC_EXP} ${DIR_SRC} ) add_executable(silu_test ${PROJECT_SOURCE_DIR}/demo/qnn/silu_test.cpp ${DIR_SRC_MEM_MANAGER} ${DIR_SRC_CPU} ${DIR_SRC_EXP} ${DIR_SRC} ) target_link_libraries(qnn_test MLLM_CPU MLLM_QNN ${CMAKE_DL_LIBS}) target_link_libraries(silu_test MLLM_CPU MLLM_QNN ${CMAKE_DL_LIBS}) diff --git a/demo/qnn/qnn_test.cpp b/demo/qnn/qnn_test.cpp index dcdd6b11..45825e1d 100644 --- a/demo/qnn/qnn_test.cpp +++ b/demo/qnn/qnn_test.cpp @@ -9,7 +9,7 @@ #include "tokenizers/BPE/Bpe.hpp" #include "backends/QNN/QNNBackend.hpp" #include "memory/SystemMemoryManager.hpp" -#include "backends/QNN/op/QNNAdd.hpp" +#include "qnn_wrapper.hpp" using namespace mllm; @@ -30,63 +30,7 @@ int main() { // build graph std::cout << "build graph" << std::endl; - // graph add node - uint32_t dimensions[] = {1, 2, 2, 2}; - qbn->modelAddTensor("x", // Node Name - (Qnn_Tensor_t){ - .version = QNN_TENSOR_VERSION_1, - {.v1 = { - .id = 0, - .name = "x", - .type = QNN_TENSOR_TYPE_APP_WRITE, - .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, - .dataType = QNN_DATATYPE_FLOAT_32, - .quantizeParams = {QNN_DEFINITION_UNDEFINED, - QNN_QUANTIZATION_ENCODING_UNDEFINED, - {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, - .rank = 4, - .dimensions = dimensions, - .memType = QNN_TENSORMEMTYPE_RAW, - {.clientBuf = {.data = nullptr, - .dataSize = 0}}}}}); - - float data[] = {1, 2, 3, 4, 5, 6, 7, 8}; - qbn->modelAddTensor("y", // Node Name - (Qnn_Tensor_t){ - .version = QNN_TENSOR_VERSION_1, - {.v1 = { - .id = 0, - .name = "y", - .type = QNN_TENSOR_TYPE_STATIC, - .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, - .dataType = QNN_DATATYPE_FLOAT_32, - .quantizeParams = {QNN_DEFINITION_UNDEFINED, - QNN_QUANTIZATION_ENCODING_UNDEFINED, - {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, - .rank = 4, - .dimensions = dimensions, - .memType = QNN_TENSORMEMTYPE_RAW, - {.clientBuf = {.data = data, - .dataSize = 32}}}}}); - - vector outputs = { - (Qnn_Tensor_t){ - .version = QNN_TENSOR_VERSION_1, - {.v1 = { - .id = 0, - .name = "add-output", - .type = QNN_TENSOR_TYPE_APP_READ, - .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, - .dataType = QNN_DATATYPE_FLOAT_32, - .quantizeParams = {QNN_DEFINITION_UNDEFINED, - QNN_QUANTIZATION_ENCODING_UNDEFINED, - {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, - .rank = 4, - .dimensions = dimensions, - .memType = QNN_TENSORMEMTYPE_RAW, - {.clientBuf = {.data = nullptr, - .dataSize = 0}}}}}}; - qbn->graphAddNode("qnn-add", "ElementWiseAdd", {"x", "y"}, outputs, "qti.aisw"); + testMatMul(qbn); // graph compile std::cout << "graph compile" << std::endl; qbn->graphFinilize(); diff --git a/demo/qnn/qnn_wrapper.hpp b/demo/qnn/qnn_wrapper.hpp new file mode 100644 index 00000000..25a863f4 --- /dev/null +++ b/demo/qnn/qnn_wrapper.hpp @@ -0,0 +1,76 @@ +#include + +#include "backends/QNN/QNNBackend.hpp" +#include "memory/SystemMemoryManager.hpp" +#include "backends/QNN/op/QNNAdd.hpp" + +using namespace mllm; + +void testMatMul(QNNBackend *qbn) { + // graph add node + uint32_t dimensions0[] = {1, 2, 2, 2}; + uint32_t dimensions1[] = {1, 1, 4, 2}; + qbn->modelAddTensor("x", // Node Name + (Qnn_Tensor_t){ + .version = QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = "x", + .type = QNN_TENSOR_TYPE_APP_WRITE, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensions0, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = nullptr, + .dataSize = 0}}}}}); + + float data[] = {1, 2, 3, 4, 5, 6, 7, 8}; + qbn->modelAddTensor("y", // Node Name + (Qnn_Tensor_t){ + .version = QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = "y", + .type = QNN_TENSOR_TYPE_STATIC, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensions1, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = data, + .dataSize = 32}}}}}); + + uint32_t dimensionsOut[] = {1, 2, 2, 4}; + vector outputs = { + (Qnn_Tensor_t){ + .version = QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = "add-output", + .type = QNN_TENSOR_TYPE_APP_READ, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensionsOut, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = nullptr, + .dataSize = 0}}}}}}; + vector paramsMatmul = { + {.paramType = QNN_PARAMTYPE_SCALAR, + .name = "transpose_in0", + {.scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value = 0}}}}, + {.paramType = QNN_PARAMTYPE_SCALAR, + .name = "transpose_in1", + {.scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value = 1}}}}}; + qbn->graphAddNode("qnn-add", "MatMul", {"x", "y"}, outputs, paramsMatmul, "qti.aisw"); +} \ No newline at end of file diff --git a/src/backends/QNN/QNNBackend.cpp b/src/backends/QNN/QNNBackend.cpp index 0a40e911..68fdc33e 100644 --- a/src/backends/QNN/QNNBackend.cpp +++ b/src/backends/QNN/QNNBackend.cpp @@ -15,6 +15,7 @@ #include "PAL/DynamicLoading.hpp" #include "PAL/GetOpt.hpp" #include "QnnSampleAppUtils.hpp" +#include "QnnTypes.h" #include "QnnWrapperUtils.hpp" #include "DynamicLoadUtil.hpp" #include "Types.hpp" @@ -46,7 +47,7 @@ QNNBackend::QNNBackend(shared_ptr mm) : Backend(mm) { std::string modelPath = "/qnn-projects/QNN-test-libs/example_libs/x86_64-linux-clang/libqnn_model_float.so"; std::string backEndPath = "/qnn-projects/QNN-test-libs/libQnnCpu.so"; - std::string inputListPaths = "/qnn-projects/QNN-test-libs/input_list_float.txt"; + std::string inputListPaths = "/qnn-projects/mllm/bin/input-list.txt"; std::string opPackagePaths = "/qnn-projects/QNN-test-libs/libQnnCpuOpPackageExample.so:QnnOpPackage_interfaceProvider"; // TODO: make these configuable m_debug = true; @@ -182,15 +183,20 @@ qnn_wrapper_api::ModelError_t QNNBackend::graphAddNode(string name, string nodeType, std::vector inputTensorNames, std::vector outputTensors, + std::vector params, string packageName) { qnn_wrapper_api::ModelError_t err = qnn_wrapper_api::ModelError_t::MODEL_NO_ERROR; + Qnn_Param_t* paramsPtr = nullptr; + if (params.size() > 0) { + paramsPtr = params.data(); + } VALIDATE(qnnModel.addNode( QNN_OPCONFIG_VERSION_1, // Op_Config_t Version name.c_str(), // Node Name packageName.c_str(), // Package Name nodeType.c_str(), // Qnn Node Type - nullptr, // Node Params - 0, // Num Node Params + paramsPtr, // Node Params + params.size(), // Num Node Params inputTensorNames.data(), // Input Tensor Names inputTensorNames.size(), // Num Input Tensor Names outputTensors.data(), // Output Tensors diff --git a/src/backends/QNN/QNNBackend.hpp b/src/backends/QNN/QNNBackend.hpp index 7f1fafa0..57ea3577 100644 --- a/src/backends/QNN/QNNBackend.hpp +++ b/src/backends/QNN/QNNBackend.hpp @@ -4,6 +4,7 @@ #include "Backend.hpp" #include "Op.hpp" #include "OpDefined.hpp" +#include "QnnTypes.h" #include "Types.hpp" #include "MemoryManager.hpp" #include "NetParameter.hpp" @@ -20,19 +21,17 @@ using std::shared_ptr; using namespace qnn; using namespace qnn::tools; - namespace mllm { enum class StatusCode { - SUCCESS, - FAILURE, - FAILURE_INPUT_LIST_EXHAUSTED, - FAILURE_SYSTEM_ERROR, - FAILURE_SYSTEM_COMMUNICATION_ERROR, - QNN_FEATURE_UNSUPPORTED + SUCCESS, + FAILURE, + FAILURE_INPUT_LIST_EXHAUSTED, + FAILURE_SYSTEM_ERROR, + FAILURE_SYSTEM_COMMUNICATION_ERROR, + QNN_FEATURE_UNSUPPORTED }; - class Op; class Tensor; @@ -83,6 +82,7 @@ class QNNBackend : public Backend { qnn_wrapper_api::ModelError_t graphAddNode(string name, string nodeType, std::vector inputTensorNames, std::vector outputTensors, + std::vector params, string packageName); qnn_wrapper_api::ModelError_t graphFinilize(); qnn_wrapper_api::ModelError_t modelAddTensor(const char *nodeName, Qnn_Tensor_t tensor); @@ -97,7 +97,7 @@ class QNNBackend : public Backend { // @brief Print a message to STDERR then exit with a non-zero void reportError(const std::string &err); - + StatusCode initialize(); StatusCode initializeBackend(); @@ -132,7 +132,6 @@ class QNNBackend : public Backend { StatusCode verifyFailReturnStatus(Qnn_ErrorHandle_t errCode); - StatusCode extractBackendProfilingInfo(Qnn_ProfileHandle_t profileHandle); StatusCode extractProfilingSubEvents(QnnProfile_EventId_t profileEventId); @@ -155,7 +154,7 @@ class QNNBackend : public Backend { std::vector m_opPackagePaths; std::string m_outputPath; QnnBackend_Config_t **m_backendConfig = nullptr; - Qnn_ContextHandle_t m_context = nullptr; + Qnn_ContextHandle_t m_context = nullptr; QnnContext_Config_t **m_contextConfig = nullptr; bool m_debug; iotensor::OutputDataType m_outputDataType; @@ -174,15 +173,14 @@ class QNNBackend : public Backend { iotensor::IOTensor m_ioTensor; bool m_isBackendInitialized; bool m_isContextCreated; - Qnn_ProfileHandle_t m_profileBackendHandle = nullptr; + Qnn_ProfileHandle_t m_profileBackendHandle = nullptr; qnn_wrapper_api::GraphConfigInfo_t **m_graphConfigsInfo = nullptr; uint32_t m_graphConfigsInfoCount; - Qnn_LogHandle_t m_logHandle = nullptr; + Qnn_LogHandle_t m_logHandle = nullptr; Qnn_BackendHandle_t m_backendHandle = nullptr; - Qnn_DeviceHandle_t m_deviceHandle = nullptr; + Qnn_DeviceHandle_t m_deviceHandle = nullptr; }; - } // namespace mllm #endif // MLLM_QNNBACKEND_H \ No newline at end of file diff --git a/src/backends/QNN/op/QNNAdd.cpp b/src/backends/QNN/op/QNNAdd.cpp index e7935de1..d8170c40 100644 --- a/src/backends/QNN/op/QNNAdd.cpp +++ b/src/backends/QNN/op/QNNAdd.cpp @@ -25,8 +25,6 @@ ErrorCode QNNAdd::reshape(vector> inputs, vector> inputs, vector> outputs) { // graph add node - // TODO: check if name_ is set in Op return graphAddNode(name(), "Add", inputs, outputs); - return NO_ERROR; } } // namespace mllm \ No newline at end of file diff --git a/src/backends/QNN/op/QNNCausalMask.cpp b/src/backends/QNN/op/QNNCausalMask.cpp new file mode 100644 index 00000000..133f84a8 --- /dev/null +++ b/src/backends/QNN/op/QNNCausalMask.cpp @@ -0,0 +1,19 @@ + +#include "QNNCausalMask.hpp" +#include "Types.hpp" +#include "QNNCommonOp.hpp" + +namespace mllm { +QNNCausalMask::QNNCausalMask(Backend *bn, string opName) : + QNNCommonOp(bn, opName) { +} + +ErrorCode QNNCausalMask::reshape(vector> inputs, vector> outputs) { + return NO_ERROR; +} + +ErrorCode QNNCausalMask::setUp(vector> inputs, vector> outputs) { + return graphAddNode(name(), "Add", inputs, outputs); +} +} // namespace mllm + diff --git a/src/backends/QNN/op/QNNCausalMask.hpp b/src/backends/QNN/op/QNNCausalMask.hpp new file mode 100644 index 00000000..7ae59c1b --- /dev/null +++ b/src/backends/QNN/op/QNNCausalMask.hpp @@ -0,0 +1,24 @@ + +#ifndef MLLM_QNNCAUSALMASK_H +#define MLLM_QNNCAUSALMASK_H + +#include "QNNCommonOp.hpp" +namespace mllm { +class QNNCausalMask : public QNNCommonOp { +public: + QNNCausalMask(Backend *bn, string opName); + virtual ~QNNCausalMask() = default; + virtual ErrorCode reshape(vector> inputs, vector> outputs) override; + virtual ErrorCode setUp(vector> inputs, vector> outputs) override; +}; + +class QNNCausalMaskCreator : public QNNBackend::Creator { +public: + virtual Op *create(OpParam op_param, Backend *bn, string name) const { + return new QNNCausalMask(bn, name); + } +}; + +} // namespace mllm + +#endif diff --git a/src/backends/QNN/op/QNNCommonOp.cpp b/src/backends/QNN/op/QNNCommonOp.cpp index a0beb3d1..5bd4db0f 100644 --- a/src/backends/QNN/op/QNNCommonOp.cpp +++ b/src/backends/QNN/op/QNNCommonOp.cpp @@ -1,4 +1,6 @@ #include "QNNCommonOp.hpp" +#include "OpDefined.hpp" +#include "QnnTypes.h" #include "QnnWrapperUtils.hpp" #include "Types.hpp" @@ -30,34 +32,43 @@ ErrorCode QNNCommonOp::load(AbstructLoader &loader) { return NO_ERROR; } -ErrorCode QNNCommonOp::graphAddNode(string name, string nodeType, vector> inputs, vector> outputs, string packageName) { +ErrorCode QNNCommonOp::graphAddNode(string name, string nodeType, vector> inputs, vector> outputs, vector params, string packageName) { vector inputTensorNames; for (auto &input : inputs) { inputTensorNames.push_back(input->name().c_str()); } + vector outputTensors; - // TODO: convert tensors to Qnn_Tensor_t like below - uint32_t dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Relu_0[] = {1, 149, 149, 32}; - for (auto &t : outputs) { - outputTensors.push_back({ - .version = QNN_TENSOR_VERSION_1, - .v1 = { - .id = 0, - .name = t->name().c_str(), - .type = QNN_TENSOR_TYPE_APP_WRITE, - .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, - .dataType = QNN_DATATYPE_FLOAT_32, - .quantizeParams = {QNN_DEFINITION_UNDEFINED, - QNN_QUANTIZATION_ENCODING_UNDEFINED, - {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, - .rank = 4, - .dimensions = {}, - .memType = QNN_TENSORMEMTYPE_RAW, - .clientBuf = {.data = nullptr, .dataSize = 0}} - }); + for (auto &output : outputs) { + uint32_t dimensions[4]; + for (int i = 0; i < output->shape().size(); i++) { + dimensions[i] = output->shape()[i]; + } + outputTensors.push_back({QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = output->name().c_str(), + .type = QNN_TENSOR_TYPE_APP_READ, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensions, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = nullptr, + .dataSize = 0}}}}}); + } + + if (qnn_wrapper_api::ModelError_t::MODEL_NO_ERROR != qnnBackend_->graphAddNode(name, nodeType, inputTensorNames, outputTensors, params, packageName)) { + return ErrorCode::INVALID_VALUE; } + return NO_ERROR; +} - if (qnn_wrapper_api::ModelError_t::MODEL_NO_ERROR != qnnBackend_->graphAddNode(name, nodeType, inputTensorNames, outputTensors, packageName)) { +ErrorCode QNNCommonOp::graphAddNode(string name, string nodeType, vector inputTensorNames, vector outputs, vector params, string packageName) { + if (qnn_wrapper_api::ModelError_t::MODEL_NO_ERROR != qnnBackend_->graphAddNode(name, nodeType, inputTensorNames, outputs, params, packageName)) { return ErrorCode::INVALID_VALUE; } return NO_ERROR; diff --git a/src/backends/QNN/op/QNNCommonOp.hpp b/src/backends/QNN/op/QNNCommonOp.hpp index e9306770..5037122f 100644 --- a/src/backends/QNN/op/QNNCommonOp.hpp +++ b/src/backends/QNN/op/QNNCommonOp.hpp @@ -3,6 +3,7 @@ #include "Op.hpp" #include "QNNBackend.hpp" +#include "QnnTypes.h" #include "Types.hpp" namespace mllm { @@ -18,7 +19,8 @@ class QNNCommonOp : public Op { protected: QNNBackend *qnnBackend_; - ErrorCode graphAddNode(string name, string nodeType, vector> inputs, vector> outputs, string packageName = "qti.aisw"); + ErrorCode graphAddNode(string name, string nodeType, vector> inputs, vector> outputs, vector params = {}, string packageName = "qti.aisw"); + ErrorCode graphAddNode(string name, string nodeType, vector inputs, vector outputs, vector params = {}, string packageName = "qti.aisw"); }; } // namespace mllm diff --git a/src/backends/QNN/op/QNNLinear.cpp b/src/backends/QNN/op/QNNLinear.cpp new file mode 100644 index 00000000..27caff85 --- /dev/null +++ b/src/backends/QNN/op/QNNLinear.cpp @@ -0,0 +1,165 @@ + +#include "QNNLinear.hpp" +#include "QnnTypes.h" +#include "Types.hpp" +#include "QNNCommonOp.hpp" +#include + +namespace mllm { +QNNLinear::QNNLinear(Backend *bn, string opName, int in_features, int out_features, bool bias) : + QNNCommonOp(bn, opName) { +} + +ErrorCode QNNLinear::reshape(vector> inputs, vector> outputs) { + CHECK_EQ(inputs.size(), 1); + CHECK_EQ(outputs.size(), 1); + // N | C | H | W + // ----------------------------------------------- + // 1 |out_channel | in_channel | 1 + // |out_features| in_features | + // ----------------------------------------------- + // batch |in_channel | seq_len | 1 + // |in_features | inputs[0]->sequence() | + // ----------------------------------------------- + // batch |out_channel | seq_len | 1 + // |out_features| inputs[0]->sequence() | + CHECK_EQ(inputs[0]->head(), 1); + CHECK_EQ(in_features_, inputs[0]->dimension()); + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(), inputs[0]->sequence(), out_features_); + return Op::reshape(inputs, outputs); +} + +ErrorCode QNNLinear::setUp(vector> inputs, vector> outputs) { + vector paramsMatmul = { + {.paramType = QNN_PARAMTYPE_SCALAR, + .name = "transpose_in0", + {.scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value = 0}}}}, + {.paramType = QNN_PARAMTYPE_SCALAR, + .name = "transpose_in1", + {.scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value = 0}}}}}; + // add weight tensor to qnn + uint32_t dimensionsWeight[4]; + for (int i = 0; i < 4; i++) { + dimensionsWeight[i] = weight_.shape()[i]; + } + qnnBackend_->modelAddTensor(weight_.name().c_str(), (Qnn_Tensor_t){ + .version = QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = weight_.name().c_str(), + .type = QNN_TENSOR_TYPE_STATIC, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensionsWeight, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = weight_.hostPtr(), + .dataSize = (uint32_t)weight_.cntSize()}}}}}); + // add input tensor to qnn + uint32_t dimensionsInput[4]; + for (int i = 0; i < 4; i++) { + dimensionsInput[i] = inputs[0]->shape()[i]; + } + qnnBackend_->modelAddTensor(inputs[0]->name().c_str(), (Qnn_Tensor_t){ + .version = QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = inputs[0]->name().c_str(), + .type = QNN_TENSOR_TYPE_APP_READ, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensionsInput, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = nullptr, + .dataSize = 0}}}}}); + + if (!support_bias_) { // if don't support bias, just matmul and write to outputs[0] + inputs.push_back(std::make_shared(weight_)); + return graphAddNode(name(), "MatMul", inputs, outputs, paramsMatmul); + } + + uint32_t dimensionsBias[4]; + for (int i = 0; i < 4; i++) { + dimensionsBias[i] = bias_.shape()[i]; + } + // add bias tensor to qnn + qnnBackend_->modelAddTensor(bias_.name().c_str(), (Qnn_Tensor_t){ + .version = QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = bias_.name().c_str(), + .type = QNN_TENSOR_TYPE_STATIC, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensionsBias, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = bias_.hostPtr(), + .dataSize = (uint32_t)bias_.cntSize()}}}}}); + // add intermediate output of matmul + vector intermediateOutput = { + {.version = QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = (name() + ".intermediate").c_str(), + .type = QNN_TENSOR_TYPE_NATIVE, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensionsBias, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = nullptr, + .dataSize = 0}}}}}}; + + graphAddNode(name(), "MatMul", {inputs[0]->name().c_str(), weight_.name().c_str()}, intermediateOutput, paramsMatmul); + + vector biasOutput = {{QNN_TENSOR_VERSION_1, + {.v1 = { + .id = 0, + .name = outputs[0]->name().c_str(), + .type = QNN_TENSOR_TYPE_APP_READ, + .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, + .dataType = QNN_DATATYPE_FLOAT_32, + .quantizeParams = {QNN_DEFINITION_UNDEFINED, + QNN_QUANTIZATION_ENCODING_UNDEFINED, + {.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}}}, + .rank = 4, + .dimensions = dimensionsBias, + .memType = QNN_TENSORMEMTYPE_RAW, + {.clientBuf = {.data = nullptr, + .dataSize = 0}}}}}}; + return graphAddNode(name(), "ElementWiseAdd", {(name() + ".intermediate").c_str(), bias_.name().c_str()}, + biasOutput, paramsMatmul); +} + +ErrorCode QNNLinear::load(AbstructLoader &loader) { + // std::cout << name() << " CPULinear load" << std::endl; + weight_.setName(name() + ".weight"); + weight_.reshape(1, 1, out_features_, in_features_); + weight_.setDtype(loader.getDataType(weight_.name())); + weight_.alloc(); + loader.load(&weight_); + + if (support_bias_) { + bias_.setName(name() + ".bias"); + bias_.reshape(1, 1, 1, out_features_); + bias_.setDtype(loader.getDataType(bias_.name())); + bias_.alloc(); + loader.load(&bias_); + } + return Op::load(loader); +} +} // namespace mllm diff --git a/src/backends/QNN/op/QNNLinear.hpp b/src/backends/QNN/op/QNNLinear.hpp new file mode 100644 index 00000000..12b914b8 --- /dev/null +++ b/src/backends/QNN/op/QNNLinear.hpp @@ -0,0 +1,36 @@ + +#ifndef MLLM_QNNLINEAR_H +#define MLLM_QNNLINEAR_H + +#include "QNNCommonOp.hpp" +namespace mllm { +class QNNLinear : public QNNCommonOp { +public: + QNNLinear(Backend *bn, string opName, int in_features, int out_features, bool bias); + virtual ~QNNLinear() = default; + virtual ErrorCode reshape(vector> inputs, vector> outputs) override; + virtual ErrorCode setUp(vector> inputs, vector> outputs) override; + virtual ErrorCode load(AbstructLoader &loader) override; + virtual ErrorCode free(vector> inputs, vector> outputs) override{}; + +private: + int in_features_; + int out_features_; + bool support_bias_; + Tensor weight_; + Tensor bias_; +}; + +class QNNLinearCreator : public QNNBackend::Creator { +public: + virtual Op *create(OpParam op_param, Backend *bn, string name) const { + int in_features = op_param["in_features"]; + int out_features = op_param["out_features"]; + int bias = op_param["bias"]; + return new QNNLinear(bn, name, in_features, out_features, (bool)bias); + } +}; + +} // namespace mllm + +#endif diff --git a/src/backends/QNN/op/QNNMatmul.cpp b/src/backends/QNN/op/QNNMatmul.cpp new file mode 100644 index 00000000..12057ef9 --- /dev/null +++ b/src/backends/QNN/op/QNNMatmul.cpp @@ -0,0 +1,68 @@ + +#include "QNNMatmul.hpp" +#include "Types.hpp" +#include "QNNCommonOp.hpp" + +namespace mllm { +QNNMatmul::QNNMatmul(Backend *bn, string opName) : + QNNCommonOp(bn, opName) { +} + +ErrorCode QNNMatmul::reshape(vector> inputs, vector> outputs) { + CHECK_EQ(inputs.size(), 2); + CHECK_EQ(outputs.size(), 1); + CHECK_EQ(inputs[0]->head(), inputs[1]->head()); + // CHECK_EQ(inputs[0]->head(), 1); + CHECK_EQ(inputs[0]->batch(), inputs[1]->batch()); + if (!transpose0_ && !transpose1_) { + /* + N | C | H | W + ----------------------------------------------- + batch |out_channel | in_channel | 1 + ----------------------------------------------- + batch |in_channel | seq_len | 1 + ----------------------------------------------- + batch |out_channel | seq_len | 1 + */ + CHECK_EQ(inputs[0]->dimension(), inputs[1]->sequence()); + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(), inputs[0]->sequence(), inputs[1]->dimension()); + } else if (transpose1_) { + /* + N | C | H | W + ----------------------------------------------- + batch |in_channel | out_channel | 1 + ----------------------------------------------- + batch |in_channel | seq_len | 1 + ----------------------------------------------- + batch |out_channel | seq_len | 1 + */ + CHECK_EQ(inputs[0]->dimension(), inputs[1]->dimension()); + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(), inputs[0]->sequence(), inputs[1]->sequence()); + } else { + /* + N | C | H | W + ----------------------------------------------- + batch |out_channel | in_channel | 1 + ----------------------------------------------- + batch |seq_len | in_channel | 1 + ----------------------------------------------- + batch |out_channel | seq_len | 1 + */ + CHECK_EQ(inputs[0]->sequence(), inputs[1]->sequence()); + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(), inputs[0]->dimension(), inputs[1]->dimension()); + } + // outputs[0]->setDtype(activationDtype()); + return Op::reshape(inputs, outputs); +} + +ErrorCode QNNMatmul::setUp(vector> inputs, vector> outputs) { + vector paramsMatmul = { + {.paramType = QNN_PARAMTYPE_SCALAR, + .name = "transpose_in0", + {.scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value = transpose0_}}}}, + {.paramType = QNN_PARAMTYPE_SCALAR, + .name = "transpose_in1", + {.scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value = transpose1_}}}}}; + return graphAddNode(name(), "Reshape", inputs, outputs, paramsMatmul); +} +} // namespace mllm diff --git a/src/backends/QNN/op/QNNMatmul.hpp b/src/backends/QNN/op/QNNMatmul.hpp new file mode 100644 index 00000000..da3035d7 --- /dev/null +++ b/src/backends/QNN/op/QNNMatmul.hpp @@ -0,0 +1,28 @@ + +#ifndef MLLM_QNNMATMUL_H +#define MLLM_QNNMATMUL_H + +#include "QNNCommonOp.hpp" +namespace mllm { +class QNNMatmul : public QNNCommonOp { +public: + QNNMatmul(Backend *bn, string opName); + virtual ~QNNMatmul() = default; + virtual ErrorCode reshape(vector> inputs, vector> outputs) override; + virtual ErrorCode setUp(vector> inputs, vector> outputs) override; + +private: + bool transpose0_; + bool transpose1_; +}; + +class QNNMatmulCreator : public QNNBackend::Creator { +public: + virtual Op *create(OpParam op_param, Backend *bn, string name) const { + return new QNNMatmul(bn, name); + } +}; + +} // namespace mllm + +#endif diff --git a/src/backends/QNN/op/QNNRope.cpp b/src/backends/QNN/op/QNNRope.cpp new file mode 100644 index 00000000..de4fc709 --- /dev/null +++ b/src/backends/QNN/op/QNNRope.cpp @@ -0,0 +1,19 @@ + +#include "QNNRope.hpp" +#include "Types.hpp" +#include "QNNCommonOp.hpp" + +namespace mllm { +QNNRope::QNNRope(Backend *bn, string opName) : + QNNCommonOp(bn, opName) { +} + +ErrorCode QNNRope::reshape(vector> inputs, vector> outputs) { + return NO_ERROR; +} + +ErrorCode QNNRope::setUp(vector> inputs, vector> outputs) { + return graphAddNode(name(), "Add", inputs, outputs); +} +} // namespace mllm + diff --git a/src/backends/QNN/op/QNNRope.hpp b/src/backends/QNN/op/QNNRope.hpp new file mode 100644 index 00000000..e78427a5 --- /dev/null +++ b/src/backends/QNN/op/QNNRope.hpp @@ -0,0 +1,24 @@ + +#ifndef MLLM_QNNROPE_H +#define MLLM_QNNROPE_H + +#include "QNNCommonOp.hpp" +namespace mllm { +class QNNRope : public QNNCommonOp { +public: + QNNRope(Backend *bn, string opName); + virtual ~QNNRope() = default; + virtual ErrorCode reshape(vector> inputs, vector> outputs) override; + virtual ErrorCode setUp(vector> inputs, vector> outputs) override; +}; + +class QNNRopeCreator : public QNNBackend::Creator { +public: + virtual Op *create(OpParam op_param, Backend *bn, string name) const { + return new QNNRope(bn, name); + } +}; + +} // namespace mllm + +#endif diff --git a/src/backends/QNN/op/QNNScale.cpp b/src/backends/QNN/op/QNNScale.cpp new file mode 100644 index 00000000..c8a03368 --- /dev/null +++ b/src/backends/QNN/op/QNNScale.cpp @@ -0,0 +1,19 @@ + +#include "QNNScale.hpp" +#include "Types.hpp" +#include "QNNCommonOp.hpp" + +namespace mllm { +QNNScale::QNNScale(Backend *bn, string opName) : + QNNCommonOp(bn, opName) { +} + +ErrorCode QNNScale::reshape(vector> inputs, vector> outputs) { + return NO_ERROR; +} + +ErrorCode QNNScale::setUp(vector> inputs, vector> outputs) { + return graphAddNode(name(), "Add", inputs, outputs); +} +} // namespace mllm + diff --git a/src/backends/QNN/op/QNNScale.hpp b/src/backends/QNN/op/QNNScale.hpp new file mode 100644 index 00000000..c8fa92ef --- /dev/null +++ b/src/backends/QNN/op/QNNScale.hpp @@ -0,0 +1,24 @@ + +#ifndef MLLM_QNNSCALE_H +#define MLLM_QNNSCALE_H + +#include "QNNCommonOp.hpp" +namespace mllm { +class QNNScale : public QNNCommonOp { +public: + QNNScale(Backend *bn, string opName); + virtual ~QNNScale() = default; + virtual ErrorCode reshape(vector> inputs, vector> outputs) override; + virtual ErrorCode setUp(vector> inputs, vector> outputs) override; +}; + +class QNNScaleCreator : public QNNBackend::Creator { +public: + virtual Op *create(OpParam op_param, Backend *bn, string name) const { + return new QNNScale(bn, name); + } +}; + +} // namespace mllm + +#endif diff --git a/src/backends/QNN/op/QNNSoftmax.cpp b/src/backends/QNN/op/QNNSoftmax.cpp new file mode 100644 index 00000000..29ef8234 --- /dev/null +++ b/src/backends/QNN/op/QNNSoftmax.cpp @@ -0,0 +1,19 @@ + +#include "QNNSoftmax.hpp" +#include "Types.hpp" +#include "QNNCommonOp.hpp" + +namespace mllm { +QNNSoftmax::QNNSoftmax(Backend *bn, string opName) : + QNNCommonOp(bn, opName) { +} + +ErrorCode QNNSoftmax::reshape(vector> inputs, vector> outputs) { + return NO_ERROR; +} + +ErrorCode QNNSoftmax::setUp(vector> inputs, vector> outputs) { + return graphAddNode(name(), "Add", inputs, outputs); +} +} // namespace mllm + diff --git a/src/backends/QNN/op/QNNSoftmax.hpp b/src/backends/QNN/op/QNNSoftmax.hpp new file mode 100644 index 00000000..00518af4 --- /dev/null +++ b/src/backends/QNN/op/QNNSoftmax.hpp @@ -0,0 +1,24 @@ + +#ifndef MLLM_QNNSOFTMAX_H +#define MLLM_QNNSOFTMAX_H + +#include "QNNCommonOp.hpp" +namespace mllm { +class QNNSoftmax : public QNNCommonOp { +public: + QNNSoftmax(Backend *bn, string opName); + virtual ~QNNSoftmax() = default; + virtual ErrorCode reshape(vector> inputs, vector> outputs) override; + virtual ErrorCode setUp(vector> inputs, vector> outputs) override; +}; + +class QNNSoftmaxCreator : public QNNBackend::Creator { +public: + virtual Op *create(OpParam op_param, Backend *bn, string name) const { + return new QNNSoftmax(bn, name); + } +}; + +} // namespace mllm + +#endif diff --git a/src/backends/QNN/op/QNNView.cpp b/src/backends/QNN/op/QNNView.cpp new file mode 100644 index 00000000..c30f7a8c --- /dev/null +++ b/src/backends/QNN/op/QNNView.cpp @@ -0,0 +1,19 @@ + +#include "QNNView.hpp" +#include "Types.hpp" +#include "QNNCommonOp.hpp" + +namespace mllm { +QNNView::QNNView(Backend *bn, string opName) : + QNNCommonOp(bn, opName) { +} + +ErrorCode QNNView::reshape(vector> inputs, vector> outputs) { + return NO_ERROR; +} + +ErrorCode QNNView::setUp(vector> inputs, vector> outputs) { + return graphAddNode(name(), "Add", inputs, outputs); +} +} // namespace mllm + diff --git a/src/backends/QNN/op/QNNView.hpp b/src/backends/QNN/op/QNNView.hpp new file mode 100644 index 00000000..4e77c0ed --- /dev/null +++ b/src/backends/QNN/op/QNNView.hpp @@ -0,0 +1,24 @@ + +#ifndef MLLM_QNNVIEW_H +#define MLLM_QNNVIEW_H + +#include "QNNCommonOp.hpp" +namespace mllm { +class QNNView : public QNNCommonOp { +public: + QNNView(Backend *bn, string opName); + virtual ~QNNView() = default; + virtual ErrorCode reshape(vector> inputs, vector> outputs) override; + virtual ErrorCode setUp(vector> inputs, vector> outputs) override; +}; + +class QNNViewCreator : public QNNBackend::Creator { +public: + virtual Op *create(OpParam op_param, Backend *bn, string name) const { + return new QNNView(bn, name); + } +}; + +} // namespace mllm + +#endif diff --git a/src/backends/QNN/op/new_op.py b/src/backends/QNN/op/new_op.py new file mode 100644 index 00000000..5de6650f --- /dev/null +++ b/src/backends/QNN/op/new_op.py @@ -0,0 +1,78 @@ +import os +import sys + + +code_hpp = """ +#ifndef MLLM_QNNADD_H +#define MLLM_QNNADD_H + +#include "QNNCommonOp.hpp" +namespace mllm { +class QNNAdd : public QNNCommonOp { +public: + QNNAdd(Backend *bn, string opName); + virtual ~QNNAdd() = default; + virtual ErrorCode reshape(vector> inputs, vector> outputs) override; + virtual ErrorCode setUp(vector> inputs, vector> outputs) override; +}; + +class QNNAddCreator : public QNNBackend::Creator { +public: + virtual Op *create(OpParam op_param, Backend *bn, string name) const { + return new QNNAdd(bn, name); + } +}; + +} // namespace mllm + +#endif +""" + +code_cpp = """ +#include "QNNAdd.hpp" +#include "Types.hpp" +#include "QNNCommonOp.hpp" + +namespace mllm { +QNNAdd::QNNAdd(Backend *bn, string opName) : + QNNCommonOp(bn, opName) { +} + +ErrorCode QNNAdd::reshape(vector> inputs, vector> outputs) { + return NO_ERROR; +} + +ErrorCode QNNAdd::setUp(vector> inputs, vector> outputs) { + return graphAddNode(name(), "Add", inputs, outputs); +} +} // namespace mllm + +""" + +if __name__ == "__main__": + args = sys.argv + if len(args) != 2: + print( + "Usage: python new_op.py [op_name]\n e.g. python new_op.py QNNXXX" + ) + exit(1) + + op_name = args[1] + dirname = op_name[:3] + op_name_upper = op_name.upper() + if dirname == "QNN": + new_code_hpp = code_hpp.replace("QNNAdd", op_name) + new_code_hpp = new_code_hpp.replace("QNNADD", op_name_upper) + file_hpp = os.getcwd() + "/" + op_name + ".hpp" + file = open(file_hpp, "w") + file.write(new_code_hpp) + file.close() + new_code_cpp = code_cpp.replace("QNNAdd", op_name) + new_code_cpp = new_code_cpp.replace("QNNADD", op_name_upper) + file_hpp = os.getcwd() + "/" + op_name + ".cpp" + file = open(file_hpp, "w") + file.write(new_code_cpp) + file.close() + else: + print("Only support CPUXXX now!") + exit(1)