Merge branch 'develop-QNN' into develop-QNN-zh

UbiquitousLearning · Dec 27, 2023 · fb035a7 · fb035a7
2 parents c026676 + 2710a3b
commit fb035a7
Show file tree

Hide file tree

Showing 8 changed files with 1,269 additions and 0 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -206,6 +206,8 @@ if(QNN)
 
     add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/src/backends/QNN)
     add_executable(qnn_test ${PROJECT_SOURCE_DIR}/demo/qnn/qnn_test.cpp ${PROJECT_SOURCE_DIR}/demo/qnn/qnn_wrapper.hpp ${DIR_SRC_MEM_MANAGER} ${DIR_SRC_CPU} ${DIR_SRC_EXP} ${DIR_SRC} ) 
+    add_executable(silu_test ${PROJECT_SOURCE_DIR}/demo/qnn/silu_test.cpp ${DIR_SRC_MEM_MANAGER} ${DIR_SRC_CPU} ${DIR_SRC_EXP} ${DIR_SRC} ) 
     target_link_libraries(qnn_test MLLM_CPU MLLM_QNN ${CMAKE_DL_LIBS})
+    target_link_libraries(silu_test MLLM_CPU MLLM_QNN ${CMAKE_DL_LIBS})
 
 endif()
diff --git a/LLaMAOpPackageHtp/LLaMAPackage/config/LLaMAOpPackageHtp.xml b/LLaMAOpPackageHtp/LLaMAPackage/config/LLaMAOpPackageHtp.xml
@@ -0,0 +1,359 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Copyright (c) 2020 Qualcomm Technologies, Inc.
+All Rights Reserved.
+Confidential and Proprietary - Qualcomm Technologies, Inc.
+-->
+<OpDefCollection
+        PackageName="LLaMAPackage"
+        Domain="LLaMA"
+        Version="1.0"
+>
+    <OpDefList>
+        <!--Example Op Package which shows how a package can be defined using supplemental info-->
+        <OpDef>
+            <Name>SiLU</Name>
+            <Description>
+                <Content>
+                    Applies the Sigmoid Linear Unit (SiLU) function, element-wise. The SiLU function is also known as the swish function.
+                </Content>
+            </Description>
+
+            <Reference Source="Torch"
+                       Url="https://pytorch.org/docs/stable/generated/torch.nn.SiLU.html"/>
+
+            <Input>
+                <Name>in[0]</Name>
+                <Description>
+                    <Content>input activation</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Layout>NHWC</Layout>
+                    <Text>[N, C, H , W]</Text>
+                </Shape>
+            </Input>
+
+            <Output>
+                <Name>out[0]</Name>
+                <Description>
+                    <Content>output activation</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Text> [N, C, H , W] </Text>
+                </Shape>
+            </Output>
+
+            <!--This Op is implemented on these Backends-->
+            <SupportedBackend>HTP</SupportedBackend>
+        </OpDef>
+
+        <!--Attention-->
+        <OpDef>
+            <Name>Attention</Name>
+            <Description>
+                <Content>
+                    Allows the model to jointly attend to information from different representation subspaces as described in the paper: Attention Is All You Need.
+                </Content>
+            </Description>
+            <Reference Source="Torch"
+                       Url="https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html#multiheadattention"/>
+            <Input>
+                <Name>in[0]</Name>
+                <Description>
+                    <Content>input activation</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Layout>NHWC</Layout>
+                    <Text>[BATCH, HEAD, SEQ, EMB]</Text>
+                </Shape>
+            </Input>
+
+            <Input>
+                <Name>in[1]</Name>
+                <Description>
+                    <Content>attention mask</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Layout>NHWC</Layout>
+                    <Text>[BATCH, SEQ]</Text>
+                </Shape>
+            </Input>
+
+            <Input>
+                <Name>in[2]</Name>
+                <Description>
+                    <Content>Q</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Text>[HEAD, EMB, EMB]</Text>
+                </Shape>
+            </Input>
+
+            <Input>
+                <Name>in[3]</Name>
+                <Description>
+                    <Content>K</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Text>[HEAD, EMB, EMB]</Text>
+                </Shape>
+            </Input>
+
+             <Input>
+                <Name>in[4]</Name>
+                <Description>
+                    <Content>V</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Text>[HEAD, EMB, EMB]</Text>
+                </Shape>
+            </Input>
+
+
+            <Output>
+                <Name>out[0]</Name>
+                <Description>
+                    <Content>The output activation
+                    </Content>
+                    <Code>
+
+                    </Code>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Layout>NHWC</Layout>
+                    <Text>[BATCH, HEAD, SEQ, EMB]</Text>
+                </Shape>
+            </Output>
+
+            <!--This Op is implemented on these Backends-->
+            <SupportedBackend>HTP</SupportedBackend>
+        </OpDef>
+
+        <OpDef>
+            <Name>RMSNorm</Name>
+            <Description>
+                <Content>
+                    LLaMA RMSNorm
+                </Content>
+            </Description>
+
+            <Input>
+                <Name>in[0]</Name>
+                <Description>
+                    <Content>input activation</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Layout>NHWC</Layout>
+                    <Text>[N, C, H , W]</Text>
+                </Shape>
+            </Input>
+
+            <Input>
+                <Name>weights</Name>
+                <Description>
+                    <Content>RMSNorm weights</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>1D</Rank>
+                    <Text>[ EMB]</Text>
+                </Shape>
+            </Input>
+
+            <Output>
+                <Name>out[0]</Name>
+                <Description>
+                    <Content>output activation</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Text> [N, C, H , W] </Text>
+                </Shape>
+            </Output>
+
+            <!--This Op is implemented on these Backends-->
+            <SupportedBackend>HTP</SupportedBackend>
+        </OpDef>
+
+        <OpDef>
+            <Name>RoPE</Name>
+            <Description>
+                <Content>
+                    LLaMA RoPE
+                </Content>
+            </Description>
+
+            <Input>
+                <Name>in[0]</Name>
+                <Description>
+                    <Content>input activation</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Layout>NHWC</Layout>
+                    <Text>[N, C, H , W]</Text>
+                </Shape>
+            </Input>
+
+            <Input>
+                <Name>weights</Name>
+                <Description>
+                    <Content>RoPE weights</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>1D</Rank>
+                    <Text>[ 32768 ]</Text>
+                </Shape>
+            </Input>
+
+            <Output>
+                <Name>out[0]</Name>
+                <Description>
+                    <Content>output activation</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>4D</Rank>
+                    <Text> [N, C, H , W] </Text>
+                </Shape>
+            </Output>
+
+            <!--This Op is implemented on these Backends-->
+            <SupportedBackend>HTP</SupportedBackend>
+        </OpDef>
+    </OpDefList>
+
+    <SupplementalOpDefList Backend="HTP">
+        <SupportedOps>
+            <OpName>SiLU</OpName>
+            <OpName>Attention</OpName>
+            <OpName>RMSNorm</OpName>
+            <OpName>RoPE</OpName>
+        </SupportedOps>
+
+        <!--SiLU-->
+        <SupplementalOpDef>
+            <Name>SiLU</Name>
+
+            <Input>
+                <Name>in[0]</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Input>
+
+
+            <Output>
+                <Name>out[0]</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Output>
+        </SupplementalOpDef>
+
+        <!--Attention-->
+        <SupplementalOpDef>
+            <Name>Attention</Name>
+
+            <Input>
+                <Name>in[0]</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Input>
+            <Input>
+                <Name>in[1]</Name>
+                <Datatype>QNN_DATATYPE_UINT_32</Datatype>
+            </Input>
+            <Input>
+                <Name>in[2]</Name>
+                <Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype>
+            </Input>
+            <Input>
+                <Name>in[3]</Name>
+                <Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype>
+            </Input>
+            <Input>
+                <Name>in[4]</Name>
+                <Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype>
+            </Input>
+
+            <Output>
+                <Name>out[0]</Name>
+                <Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype>
+                <Datatype>QNN_DATATYPE_UFIXED_POINT_16</Datatype>
+            </Output>
+        </SupplementalOpDef>
+
+
+        <!--RMSNorm-->
+        <SupplementalOpDef>
+            <Name>RMSNorm</Name>
+
+            <Input>
+                <Name>in[0]</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Input>
+            <Input>
+                <Name>weights</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Input>
+
+            <Output>
+                <Name>out[0]</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Output>
+        </SupplementalOpDef>
+
+        <!--RoPE-->
+        <SupplementalOpDef>
+            <Name>RoPE</Name>
+
+            <Input>
+                <Name>in[0]</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Input>
+            <Input>
+                <Name>weights</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Input>
+
+            <Output>
+                <Name>out[0]</Name>
+                <Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
+            </Output>
+        </SupplementalOpDef>
+
+
+    </SupplementalOpDefList>
+
+</OpDefCollection>