alibaba · wangzhaode · Jul 4, 2024 · Jul 4, 2024
diff --git a/.github/workflows/pymnn_windows.yml b/.github/workflows/pymnn_windows.yml
@@ -30,7 +30,7 @@ jobs:
             python-version: '3.9'
       - name: prepare
         run: |
-            pip3 install numpy opencv-python torch
+            pip3 install numpy==1.25 opencv-python torch
       - name: build
         run: |
             cd pymnn/pip_package

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -636,11 +636,6 @@ IF(MNN_BUILD_CODEGEN)
     include(${CMAKE_CURRENT_LIST_DIR}/codegen/CMakeLists.txt)
 ENDIF()
 
-IF(MNN_BUILD_LLM)
-    # add_definitions(-DMNN_BUILD_LLM)
-    include(${CMAKE_CURRENT_LIST_DIR}/transformers/llm/engine/CMakeLists.txt)
-ENDIF()
-
 # NPU
 IF(MNN_NPU)
     add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/hiai/)
@@ -735,6 +730,14 @@ IF(MNN_BUILD_OPENCV AND NOT MNN_SEP_BUILD)
   target_sources(MNN PRIVATE $<TARGET_OBJECTS:MNNOpenCV>)
 ENDIF()
 
+IF(MNN_BUILD_LLM)
+    # add_definitions(-DMNN_BUILD_LLM)
+    include(${CMAKE_CURRENT_LIST_DIR}/transformers/llm/engine/CMakeLists.txt)
+    IF(NOT MNN_SEP_BUILD)
+      target_sources(MNN PRIVATE $<TARGET_OBJECTS:llm>)
+    ENDIF()
+ENDIF()
+
 if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
 # Using -pthread, needed by thread-safe implemention of glibc, is better than only using -lpthread
 # https://stackoverflow.com/questions/23250863/difference-between-pthread-and-lpthread-while-compiling

diff --git a/docs/Makefile b/docs/Makefile
diff --git a/docs/inference/module.md b/docs/inference/module.md
@@ -225,7 +225,7 @@ MNN::TensorCallBackWithInfo callBack = [&](const std::vector<MNN::Tensor*>& nten
     return true;
 };
 
-// 设置回调函数，需要是创建该 Module 时的 executor ，非多实例情况下用全局 executor 即可：
+// 设置回调函数，需要时创建该 Module 时的 executor ，非多实例情况下用全局 executor 即可：
 Express::Executor::getGlobalExecutor()->setCallBack(std::move(beforeCallBack), std::move(callBack));
 
 // forward would trigger callback

diff --git a/docs/tools/convert.md b/docs/tools/convert.md
@@ -48,7 +48,7 @@ Usage:
       --weightQuantAsymmetric   与weightQuantBits结合使用，决定是否用非对称量化，默认为`true`
 
       --compressionParamsFile arg
-                                使用MNN模型压缩工具箱生成的模型压缩信息文件
+                                使用MNN模型压缩工具箱生成的模型压缩信息文件或根据用户提供的量化参数来生成对应的量化模型，量化参数文件可参考tools/converter/user_provide_quant_params.json
 
       --saveStaticModel         固定输入形状，保存静态模型， default: false
 

diff --git a/docs/transformers/diffusion.md b/docs/transformers/diffusion.md
@@ -1,3 +1,45 @@
 # 扩散模型
 
-TODO
+## 模型支持与下载
+
+[Download-runwayml/stable-diffusion-v1-5]: 
+https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main
+[Download-IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1]:
+https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1/tree/main
+
+## 模型转换
+### 将Huggingface的Stable Diffusion模型 转为onnx模型
+python export/onnx_export.py \
+    --model_path hf_sd_load_path \
+    --output_path onnx_save_path
+
+### 将onnx模型转为mnn模型
+新建diffusion mnn模型文件夹，将转好的mnn文件放在该文件夹下。
+./MNNConvert -f ONNX --modelFile onnx_save_path/text_encoder/model.onnx --MNNModel mnn_save_path/text_encoder.mnn --weightQuantBits 8 --bizCode biz
+./MNNConvert -f ONNX --modelFile onnx_save_path/unet/model.onnx --MNNModel mnn_save_path/unet.mnn --transformerFuse --weightQuantBits 8 --bizCode biz
+./MNNConvert -f ONNX --modelFile onnx_save_path/vae_decoder/model.onnx --keepInputFormat --MNNModel mnn_save_path/vae_decoder.mnn --weightQuantBits 8 --bizCode biz
+
+## 编译Diffusion Demo
+### Linux/MAC/Windows上
+cmake .. -DMNN_BUILD_DIFFUSION=ON -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON -DMNN_OPENCL=ON -DMNN_SEP_BUILD=OFF -DMNN_SUPPORT_TRANSFORMER_FUSE=ON
+
+### Android上
+cd project/android/build
+../build_64.sh -DMNN_BUILD_DIFFUSION=ON -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON -DMNN_OPENCL=ON -DMNN_SEP_BUILD=OFF -DMNN_SUPPORT_TRANSFORMER_FUSE=ON
+
+## 运行Diffusion Demo
+./diffusion_demo <resource_path> <model_type> <output_image_name> <input_text>
+其中，resource_path 就是mnn模型文件的路径，除了mnn文件，还需要
+（1）将MNN目录transformers/diffusion/scheduler/alphas.txt文件拷贝到该文件夹下。
+（2）针对stable-diffusion-v1-5模型需要将huggingfacetokenizer目录下merges.txt和vocab.json拷贝到该文件夹中。针对Taiyi-Stable-Diffusion模型需要将huggingfacetokenizer目录下vocab.txt拷贝到该文件夹中。
+
+model_type是目前支持的两种diffusion模型的类别。如果是stable-diffusion-v1-5模型设为0，如果是Taiyi-Stable-Diffusion模型设为1。
+
+output_image_name是生成图片的名字，默认图片位置在当前运行目录下。
+
+input_text是文生图的prompt，如果是stable-diffusion-v1-5模型建议英文prompt，如果是Taiyi-Stable-Diffusion建议中文prompt。
+
+运行指令例如: 
+./diffusion_demo mnn_save_path 0 demo.jpg "a cute cat"
+./diffusion_demo mnn_save_path 1 demo.jpg "一只可爱的猫"
+
diff --git a/include/MNN/MNNDefine.h b/include/MNN/MNNDefine.h
@@ -69,6 +69,6 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \
 #define STR(x) STR_IMP(x)
 #define MNN_VERSION_MAJOR 2
 #define MNN_VERSION_MINOR 9
-#define MNN_VERSION_PATCH 1
+#define MNN_VERSION_PATCH 2
 #define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH)
 #endif /* MNNDefine_h */
diff --git a/project/android/updateTest.sh b/project/android/updateTest.sh
@@ -4,6 +4,7 @@ DIR=MNN
 make -j16
 adb push ./libllm.so /data/local/tmp/MNN/libllm.so
 adb push ./llm_demo /data/local/tmp/MNN/llm_demo
+adb push ./diffusion_demo /data/local/tmp/MNN/diffusion_demo
 adb push ./libMNN.so /data/local/tmp/$DIR/libMNN.so
 adb push ./libMNN_CL.so /data/local/tmp/$DIR/libMNN_CL.so
 adb push ./libMNN_Vulkan.so /data/local/tmp/$DIR/libMNN_Vulkan.so

diff --git a/pymnn/examples/MNNLlm/llm_example.py b/pymnn/examples/MNNLlm/llm_example.py
@@ -0,0 +1,19 @@
+import MNN.llm as llm
+import sys
+
+if len(sys.argv) < 2:
+    print('usage: python llm_example.py <path_to_model_config>')
+    exit(1)
+
+config_path = sys.argv[1]
+# create model
+qwen = llm.create(config_path)
+# load model
+qwen.load()
+
+# response stream
+out = qwen.response('你好', True)
+print(out)
+
+out_ids = qwen.generate([151644, 872, 198, 108386, 151645, 198, 151644, 77091])
+print(out_ids)
diff --git a/pymnn/pip_package/MNN/llm/__init__.py b/pymnn/pip_package/MNN/llm/__init__.py
@@ -0,0 +1,76 @@
+import _mnncengine.llm as _F
+
+class LLM(_F.LLM):
+    def load(self, model_dir):
+        '''
+        load model from model_dir
+
+        Parameters
+        ----------
+        model_dir : model path (split) or model name (single)
+
+        Returns
+        -------
+        None
+
+        Example:
+        -------
+        >>> llm.load('../qwen-1.8b-in4/conig.json')
+        '''
+        super.load(model_dir)
+
+    def generate(self, input_ids):
+        '''
+        generate by input_ids
+
+        Parameters
+        ----------
+        input_ids : input token ids, list of int
+
+        Returns
+        -------
+        output_ids : output token ids, list of int
+
+        Example:
+        -------
+        >>> input_ids = [151644, 872, 198, 108386, 151645, 198, 151644, 77091]
+        >>> output_ids = qwen.generate(input_ids)
+        '''
+        return super.generate(input_ids)
+
+    def response(self, prompt, stream = False):
+        '''
+        response by prompt
+
+        Parameters
+        ----------
+        prompt : input prompt
+        stream : generate string stream, default is False
+
+        Returns
+        -------
+        res : output string
+
+        Example:
+        -------
+        >>> res = qwen.response('Hello', True)
+        '''
+        return super.response(prompt, stream)
+
+def create(config_path):
+    '''
+    create LLM instance by `config.json`
+
+    Parameters
+    ----------
+    config_path : config path or model path
+
+    Returns
+    -------
+    llm : LLM instance
+
+    Example:
+    -------
+    >>> qwen = llm.create('./qwen-1.8b-int4/config.json')
+    '''
+    return _F.create(config_path)
diff --git a/pymnn/pip_package/build_deps.py b/pymnn/pip_package/build_deps.py
@@ -29,6 +29,8 @@
 USE_RENDER   = False
 USE_SSE      = True
 USE_OPENMP   = False
+USE_LLM      = False
+USE_ARM82    = False
 
 if len(sys.argv) > 1 and sys.argv[1] != None:
     if "trt" in sys.argv[1]:
@@ -51,6 +53,10 @@
         USE_SSE = False
     if "openmp" in sys.argv[1]:
         USE_OPENMP = True
+    if "llm" in sys.argv[1]:
+        USE_LLM = True
+    if "arm82" in sys.argv[1]:
+        USE_ARM82 = True
 
 print ("USE_INTERNAL:", USE_INTERNAL)
 print ("USE_TRT:", USE_TRT)
@@ -62,6 +68,8 @@
 print ("USE_RENDER:", USE_RENDER)
 print ("USE_SSE:", USE_SSE)
 print ("USE_OPENMP:", USE_OPENMP)
+print ("USE_LLM:", USE_LLM)
+print ("USE_ARM82:", USE_ARM82)
 
 def build_deps():
     """ build depency """
@@ -79,6 +87,10 @@ def build_deps():
         extra_opts += ' -DMNN_VULKAN=ON -DMNN_VULKAN_IMAGE=OFF'
     if USE_OPENCL:
         extra_opts += ' -DMNN_OPENCL=ON'
+    if USE_LLM:
+        extra_opts += ' -DMNN_BUILD_LLM=ON -DMNN_LOW_MEMORY=ON -DMNN_SUPPORT_TRANSFORMER_FUSE=ON'
+    if USE_ARM82:
+        extra_opts += ' -DMNN_ARM82=ON'
     extra_opts += ' -DMNN_USE_THREAD_POOL=OFF -DMNN_OPENMP=ON' if USE_OPENMP else ' -DMNN_USE_THREAD_POOL=ON -DMNN_OPENMP=OFF'
 
     if IS_WINDOWS:

diff --git a/pymnn/pip_package/pyproject.toml b/pymnn/pip_package/pyproject.toml
@@ -16,7 +16,7 @@ test-skip = [
 ]
 test-requires = [
     "opencv-python==4.6.0.66",
-    "numpy",
+    "numpy==1.13.3",
     "torch"
 ]
 test-command = [

diff --git a/pymnn/pip_package/setup.py b/pymnn/pip_package/setup.py
@@ -214,6 +214,9 @@ def configure_extension_build():
         engine_include_dirs += [os.path.join(root_dir, "3rd_party", "rapidjson")]
     # cv include
     engine_include_dirs += [os.path.join(root_dir, "tools", "cv", "include")]
+    # llm include
+    engine_include_dirs += [os.path.join(root_dir, "transformers", "llm", "engine", "include")]
+    engine_include_dirs += [os.path.join(root_dir, "3rd_party")]
     engine_include_dirs += [np.get_include()]
 
     lib_files = []
@@ -247,6 +250,12 @@ def configure_extension_build():
     # add libTorch dependency
     torch_lib = None
     cmakecache = os.path.join(root_dir, BUILD_DIR, 'CMakeCache.txt')
+    # llm
+    for line in open(cmakecache, 'rt').readlines():
+        if 'MNN_BUILD_LLM' in line:
+            if 'ON' in line:
+                extra_compile_args += ['-DPYMNN_LLM_API']
+    # torch lib
     for line in open(cmakecache, 'rt').readlines():
         if 'TORCH_LIBRARY' in line:
             torch_lib = os.path.dirname(line[line.find('=')+1:])

diff --git a/pymnn/src/MNN.cc b/pymnn/src/MNN.cc
@@ -66,6 +66,10 @@ using RegularizationMethod = ParameterOptimizer::RegularizationMethod;
 #endif
 #endif
 
+#ifdef PYMNN_LLM_API
+#include "llm.h"
+#endif
+
 #ifdef PYMNN_INTERNAL_SERVING
 #include <MNN/AutoTime.hpp>
 #include "internal/monitor_service.h"
@@ -1610,7 +1614,7 @@ static PyObject* PyMNNTensor_fromNumpy(PyMNNTensor *self, PyObject *args) {
             return NULL;
         }
         DType dtype = htype2dtype(self->tensor->getType());
-        int npy_type = PyArray_TYPE(data);
+        int npy_type = PyArray_TYPE((const PyArrayObject*)data);
         int itemsize = getitemsize(dtype, npy_type);
         PyArrayObject *data_cont= PyArray_GETCONTIGUOUS((PyArrayObject*)data);
         auto tmpBuffer = PyArray_DATA(data_cont);
@@ -1946,7 +1950,7 @@ static PyObject* PyMNNCVImageProcess_convert(PyMNNCVImageProcess *self, PyObject
 #ifdef PYMNN_NUMPY_USABLE
     else if(gNumpyValid && PyArray_Check(source)) {
         // Array Data
-        int npy_type = PyArray_TYPE(source);
+        int npy_type = PyArray_TYPE((const PyArrayObject*)source);
         if(npy_type != NPY_UINT8) {
             PyErr_SetString(PyExc_Exception,
                         "PyMNNCVImageProcess_convert: only numpy.uint8 is supported for numpy");
@@ -2710,6 +2714,20 @@ PyMODINIT_FUNC MOD_INIT_FUNC(void) {
     }
 #endif
 #endif
+#ifdef PYMNN_LLM_API
+    // llm submodule
+    auto llm_module = def_submodule(m, "llm");
+    if (PyType_Ready(&PyMNNLLM) < 0) {
+        PyErr_SetString(PyExc_Exception, "initMNN.llm: PyType_Ready PyMNNLLM failed");
+        ERROR_RETURN
+    }
+    PyModule_AddObject(llm_module, "LLM", (PyObject *)PyType_FindTLSType(&PyMNNLLM));
+    // add methods of llm
+    constexpr int llm_method_num = sizeof(PyMNNLLM_static_methods) / sizeof(PyMethodDef);
+    for (int i = 0; i < llm_method_num; i++) {
+        def_method(llm_module, &PyMNNLLM_static_methods[i]);
+    }
+#endif
 
 #if PY_MAJOR_VERSION >= 3
     return m;