-
Notifications
You must be signed in to change notification settings - Fork 83
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'develop-QNN' into develop-QNN-zh
- Loading branch information
Showing
8 changed files
with
1,269 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
359 changes: 359 additions & 0 deletions
359
LLaMAOpPackageHtp/LLaMAPackage/config/LLaMAOpPackageHtp.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,359 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
Copyright (c) 2020 Qualcomm Technologies, Inc. | ||
All Rights Reserved. | ||
Confidential and Proprietary - Qualcomm Technologies, Inc. | ||
--> | ||
<OpDefCollection | ||
PackageName="LLaMAPackage" | ||
Domain="LLaMA" | ||
Version="1.0" | ||
> | ||
<OpDefList> | ||
<!--Example Op Package which shows how a package can be defined using supplemental info--> | ||
<OpDef> | ||
<Name>SiLU</Name> | ||
<Description> | ||
<Content> | ||
Applies the Sigmoid Linear Unit (SiLU) function, element-wise. The SiLU function is also known as the swish function. | ||
</Content> | ||
</Description> | ||
|
||
<Reference Source="Torch" | ||
Url="https://pytorch.org/docs/stable/generated/torch.nn.SiLU.html"/> | ||
|
||
<Input> | ||
<Name>in[0]</Name> | ||
<Description> | ||
<Content>input activation</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Layout>NHWC</Layout> | ||
<Text>[N, C, H , W]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Output> | ||
<Name>out[0]</Name> | ||
<Description> | ||
<Content>output activation</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Text> [N, C, H , W] </Text> | ||
</Shape> | ||
</Output> | ||
|
||
<!--This Op is implemented on these Backends--> | ||
<SupportedBackend>HTP</SupportedBackend> | ||
</OpDef> | ||
|
||
<!--Attention--> | ||
<OpDef> | ||
<Name>Attention</Name> | ||
<Description> | ||
<Content> | ||
Allows the model to jointly attend to information from different representation subspaces as described in the paper: Attention Is All You Need. | ||
</Content> | ||
</Description> | ||
<Reference Source="Torch" | ||
Url="https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html#multiheadattention"/> | ||
<Input> | ||
<Name>in[0]</Name> | ||
<Description> | ||
<Content>input activation</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Layout>NHWC</Layout> | ||
<Text>[BATCH, HEAD, SEQ, EMB]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Input> | ||
<Name>in[1]</Name> | ||
<Description> | ||
<Content>attention mask</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Layout>NHWC</Layout> | ||
<Text>[BATCH, SEQ]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Input> | ||
<Name>in[2]</Name> | ||
<Description> | ||
<Content>Q</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Text>[HEAD, EMB, EMB]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Input> | ||
<Name>in[3]</Name> | ||
<Description> | ||
<Content>K</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Text>[HEAD, EMB, EMB]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Input> | ||
<Name>in[4]</Name> | ||
<Description> | ||
<Content>V</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Text>[HEAD, EMB, EMB]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
|
||
<Output> | ||
<Name>out[0]</Name> | ||
<Description> | ||
<Content>The output activation | ||
</Content> | ||
<Code> | ||
|
||
</Code> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Layout>NHWC</Layout> | ||
<Text>[BATCH, HEAD, SEQ, EMB]</Text> | ||
</Shape> | ||
</Output> | ||
|
||
<!--This Op is implemented on these Backends--> | ||
<SupportedBackend>HTP</SupportedBackend> | ||
</OpDef> | ||
|
||
<OpDef> | ||
<Name>RMSNorm</Name> | ||
<Description> | ||
<Content> | ||
LLaMA RMSNorm | ||
</Content> | ||
</Description> | ||
|
||
<Input> | ||
<Name>in[0]</Name> | ||
<Description> | ||
<Content>input activation</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Layout>NHWC</Layout> | ||
<Text>[N, C, H , W]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Input> | ||
<Name>weights</Name> | ||
<Description> | ||
<Content>RMSNorm weights</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>1D</Rank> | ||
<Text>[ EMB]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Output> | ||
<Name>out[0]</Name> | ||
<Description> | ||
<Content>output activation</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Text> [N, C, H , W] </Text> | ||
</Shape> | ||
</Output> | ||
|
||
<!--This Op is implemented on these Backends--> | ||
<SupportedBackend>HTP</SupportedBackend> | ||
</OpDef> | ||
|
||
<OpDef> | ||
<Name>RoPE</Name> | ||
<Description> | ||
<Content> | ||
LLaMA RoPE | ||
</Content> | ||
</Description> | ||
|
||
<Input> | ||
<Name>in[0]</Name> | ||
<Description> | ||
<Content>input activation</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Layout>NHWC</Layout> | ||
<Text>[N, C, H , W]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Input> | ||
<Name>weights</Name> | ||
<Description> | ||
<Content>RoPE weights</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>1D</Rank> | ||
<Text>[ 32768 ]</Text> | ||
</Shape> | ||
</Input> | ||
|
||
<Output> | ||
<Name>out[0]</Name> | ||
<Description> | ||
<Content>output activation</Content> | ||
</Description> | ||
<Mandatory>true</Mandatory> | ||
<Datatype>BACKEND_SPECIFIC</Datatype> | ||
<Shape> | ||
<Rank>4D</Rank> | ||
<Text> [N, C, H , W] </Text> | ||
</Shape> | ||
</Output> | ||
|
||
<!--This Op is implemented on these Backends--> | ||
<SupportedBackend>HTP</SupportedBackend> | ||
</OpDef> | ||
</OpDefList> | ||
|
||
<SupplementalOpDefList Backend="HTP"> | ||
<SupportedOps> | ||
<OpName>SiLU</OpName> | ||
<OpName>Attention</OpName> | ||
<OpName>RMSNorm</OpName> | ||
<OpName>RoPE</OpName> | ||
</SupportedOps> | ||
|
||
<!--SiLU--> | ||
<SupplementalOpDef> | ||
<Name>SiLU</Name> | ||
|
||
<Input> | ||
<Name>in[0]</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Input> | ||
|
||
|
||
<Output> | ||
<Name>out[0]</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Output> | ||
</SupplementalOpDef> | ||
|
||
<!--Attention--> | ||
<SupplementalOpDef> | ||
<Name>Attention</Name> | ||
|
||
<Input> | ||
<Name>in[0]</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Input> | ||
<Input> | ||
<Name>in[1]</Name> | ||
<Datatype>QNN_DATATYPE_UINT_32</Datatype> | ||
</Input> | ||
<Input> | ||
<Name>in[2]</Name> | ||
<Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype> | ||
</Input> | ||
<Input> | ||
<Name>in[3]</Name> | ||
<Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype> | ||
</Input> | ||
<Input> | ||
<Name>in[4]</Name> | ||
<Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype> | ||
</Input> | ||
|
||
<Output> | ||
<Name>out[0]</Name> | ||
<Datatype>QNN_DATATYPE_UFIXED_POINT_8</Datatype> | ||
<Datatype>QNN_DATATYPE_UFIXED_POINT_16</Datatype> | ||
</Output> | ||
</SupplementalOpDef> | ||
|
||
|
||
<!--RMSNorm--> | ||
<SupplementalOpDef> | ||
<Name>RMSNorm</Name> | ||
|
||
<Input> | ||
<Name>in[0]</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Input> | ||
<Input> | ||
<Name>weights</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Input> | ||
|
||
<Output> | ||
<Name>out[0]</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Output> | ||
</SupplementalOpDef> | ||
|
||
<!--RoPE--> | ||
<SupplementalOpDef> | ||
<Name>RoPE</Name> | ||
|
||
<Input> | ||
<Name>in[0]</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Input> | ||
<Input> | ||
<Name>weights</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Input> | ||
|
||
<Output> | ||
<Name>out[0]</Name> | ||
<Datatype>QNN_DATATYPE_FLOAT_16</Datatype> | ||
</Output> | ||
</SupplementalOpDef> | ||
|
||
|
||
</SupplementalOpDefList> | ||
|
||
</OpDefCollection> |
Oops, something went wrong.