Skip to content

Commit

Permalink
Merge branch 'develop-QNN' into develop-QNN-zh
Browse files Browse the repository at this point in the history
  • Loading branch information
liang1232018 authored Dec 27, 2023
2 parents c026676 + 2710a3b commit fb035a7
Show file tree
Hide file tree
Showing 8 changed files with 1,269 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ if(QNN)

add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/src/backends/QNN)
add_executable(qnn_test ${PROJECT_SOURCE_DIR}/demo/qnn/qnn_test.cpp ${PROJECT_SOURCE_DIR}/demo/qnn/qnn_wrapper.hpp ${DIR_SRC_MEM_MANAGER} ${DIR_SRC_CPU} ${DIR_SRC_EXP} ${DIR_SRC} )
add_executable(silu_test ${PROJECT_SOURCE_DIR}/demo/qnn/silu_test.cpp ${DIR_SRC_MEM_MANAGER} ${DIR_SRC_CPU} ${DIR_SRC_EXP} ${DIR_SRC} )
target_link_libraries(qnn_test MLLM_CPU MLLM_QNN ${CMAKE_DL_LIBS})
target_link_libraries(silu_test MLLM_CPU MLLM_QNN ${CMAKE_DL_LIBS})

endif()
359 changes: 359 additions & 0 deletions LLaMAOpPackageHtp/LLaMAPackage/config/LLaMAOpPackageHtp.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,359 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2020 Qualcomm Technologies, Inc.
All Rights Reserved.
Confidential and Proprietary - Qualcomm Technologies, Inc.
-->
<OpDefCollection
PackageName="LLaMAPackage"
Domain="LLaMA"
Version="1.0"
>
<OpDefList>
<!--Example Op Package which shows how a package can be defined using supplemental info-->
<OpDef>
<Name>SiLU</Name>
<Description>
<Content>
Applies the Sigmoid Linear Unit (SiLU) function, element-wise. The SiLU function is also known as the swish function.
</Content>
</Description>

<Reference Source="Torch"
Url="https://pytorch.org/docs/stable/generated/torch.nn.SiLU.html"/>

<Input>
<Name>in[0]</Name>
<Description>
<Content>input activation</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Layout>NHWC</Layout>
<Text>[N, C, H , W]</Text>
</Shape>
</Input>

<Output>
<Name>out[0]</Name>
<Description>
<Content>output activation</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Text> [N, C, H , W] </Text>
</Shape>
</Output>

<!--This Op is implemented on these Backends-->
<SupportedBackend>HTP</SupportedBackend>
</OpDef>

<!--Attention-->
<OpDef>
<Name>Attention</Name>
<Description>
<Content>
Allows the model to jointly attend to information from different representation subspaces as described in the paper: Attention Is All You Need.
</Content>
</Description>
<Reference Source="Torch"
Url="https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html#multiheadattention"/>
<Input>
<Name>in[0]</Name>
<Description>
<Content>input activation</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Layout>NHWC</Layout>
<Text>[BATCH, HEAD, SEQ, EMB]</Text>
</Shape>
</Input>

<Input>
<Name>in[1]</Name>
<Description>
<Content>attention mask</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Layout>NHWC</Layout>
<Text>[BATCH, SEQ]</Text>
</Shape>
</Input>

<Input>
<Name>in[2]</Name>
<Description>
<Content>Q</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Text>[HEAD, EMB, EMB]</Text>
</Shape>
</Input>

<Input>
<Name>in[3]</Name>
<Description>
<Content>K</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Text>[HEAD, EMB, EMB]</Text>
</Shape>
</Input>

<Input>
<Name>in[4]</Name>
<Description>
<Content>V</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Text>[HEAD, EMB, EMB]</Text>
</Shape>
</Input>


<Output>
<Name>out[0]</Name>
<Description>
<Content>The output activation
</Content>
<Code>

</Code>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Layout>NHWC</Layout>
<Text>[BATCH, HEAD, SEQ, EMB]</Text>
</Shape>
</Output>

<!--This Op is implemented on these Backends-->
<SupportedBackend>HTP</SupportedBackend>
</OpDef>

<OpDef>
<Name>RMSNorm</Name>
<Description>
<Content>
LLaMA RMSNorm
</Content>
</Description>

<Input>
<Name>in[0]</Name>
<Description>
<Content>input activation</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Layout>NHWC</Layout>
<Text>[N, C, H , W]</Text>
</Shape>
</Input>

<Input>
<Name>weights</Name>
<Description>
<Content>RMSNorm weights</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>1D</Rank>
<Text>[ EMB]</Text>
</Shape>
</Input>

<Output>
<Name>out[0]</Name>
<Description>
<Content>output activation</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Text> [N, C, H , W] </Text>
</Shape>
</Output>

<!--This Op is implemented on these Backends-->
<SupportedBackend>HTP</SupportedBackend>
</OpDef>

<OpDef>
<Name>RoPE</Name>
<Description>
<Content>
LLaMA RoPE
</Content>
</Description>

<Input>
<Name>in[0]</Name>
<Description>
<Content>input activation</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Layout>NHWC</Layout>
<Text>[N, C, H , W]</Text>
</Shape>
</Input>

<Input>
<Name>weights</Name>
<Description>
<Content>RoPE weights</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>1D</Rank>
<Text>[ 32768 ]</Text>
</Shape>
</Input>

<Output>
<Name>out[0]</Name>
<Description>
<Content>output activation</Content>
</Description>
<Mandatory>true</Mandatory>
<Datatype>BACKEND_SPECIFIC</Datatype>
<Shape>
<Rank>4D</Rank>
<Text> [N, C, H , W] </Text>
</Shape>
</Output>

<!--This Op is implemented on these Backends-->
<SupportedBackend>HTP</SupportedBackend>
</OpDef>
</OpDefList>

<SupplementalOpDefList Backend="HTP">
<SupportedOps>
<OpName>SiLU</OpName>
<OpName>Attention</OpName>
<OpName>RMSNorm</OpName>
<OpName>RoPE</OpName>
</SupportedOps>

<!--SiLU-->
<SupplementalOpDef>
<Name>SiLU</Name>

<Input>
<Name>in[0]</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Input>


<Output>
<Name>out[0]</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Output>
</SupplementalOpDef>

<!--Attention-->
<SupplementalOpDef>
<Name>Attention</Name>

<Input>
<Name>in[0]</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Input>
<Input>
<Name>in[1]</Name>
<Datatype>QNN_DATATYPE_UINT_32</Datatype>
</Input>
<Input>
<Name>in[2]</Name>
<Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype>
</Input>
<Input>
<Name>in[3]</Name>
<Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype>
</Input>
<Input>
<Name>in[4]</Name>
<Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype>
</Input>

<Output>
<Name>out[0]</Name>
<Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype>
<Datatype>QNN_DATATYPE_UFIXED_POINT_16</Datatype>
</Output>
</SupplementalOpDef>


<!--RMSNorm-->
<SupplementalOpDef>
<Name>RMSNorm</Name>

<Input>
<Name>in[0]</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Input>
<Input>
<Name>weights</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Input>

<Output>
<Name>out[0]</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Output>
</SupplementalOpDef>

<!--RoPE-->
<SupplementalOpDef>
<Name>RoPE</Name>

<Input>
<Name>in[0]</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Input>
<Input>
<Name>weights</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Input>

<Output>
<Name>out[0]</Name>
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype>
</Output>
</SupplementalOpDef>


</SupplementalOpDefList>

</OpDefCollection>
Loading

0 comments on commit fb035a7

Please sign in to comment.