diff --git a/.github/workflows/sphinx-build-deploy.yml b/.github/workflows/sphinx-build-deploy.yml new file mode 100644 index 00000000..db32ef52 --- /dev/null +++ b/.github/workflows/sphinx-build-deploy.yml @@ -0,0 +1,36 @@ +name: Deploy Sphinx Documentation + +on: + push: + branches: + - sphinx_docs # Trigger the workflow on pushes to the sphinx_docs branch + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + ref: sphinx_docs # Ensure the sphinx_docs branch is checked out + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.x + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Build Sphinx documentation + run: | + sphinx-build -b html docs/source docs/build/html + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/build/html diff --git a/.gitignore b/.gitignore index fdbbfad2..c8f99a56 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,9 @@ **/__pycache__ venv/ +.idea +arch-docs/ +doc.md +img.png +docs/build +docs/autodocs +docs/source/apidocs diff --git a/README.md b/README.md index 3334a0ea..ef79c8eb 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ the clean and optimized source code for those examples in [`paper/`](paper). ## Getting started -Have a look at the [SLOTHY tutorial](tutorial/README.md) for a hands-on and example-based introduction to SLOTHY. +Have a look at the [SLOTHY tutorial](docs/source/tutorial/README.md) for a hands-on and example-based introduction to SLOTHY. ## Real world uses @@ -119,7 +119,7 @@ INFO:aarch64_simple0_a55.slothy:Minimum number of stalls: 18 ### Examples -The [SLOTHY Tutorial](tutorial/README.md) and the [examples](examples/naive) directory contain numerous exemplary +The [SLOTHY Tutorial](docs/source/tutorial/README.md) and the [examples](examples/naive) directory contain numerous exemplary assembly snippets. To try them, use `python3 example.py --examples={YOUR_EXAMPLE}`. See `python3 example.py --help` for the list of all available examples. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/index.md b/docs/index.md index 7c3d8df5..e9534185 100644 --- a/docs/index.md +++ b/docs/index.md @@ -15,7 +15,7 @@ instructions, and scheduling/allocation optimizations are tightly controlled thr constraints, the developer keeps close control over the final assembly, while being freed from tedious micro-optimizations. -See also [FAQ](faq.md) +See also [FAQ](source/faq.md) #### Architecture/Microarchitecture support diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..747ffb7b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/_static/css/style.css b/docs/source/_static/css/style.css new file mode 100644 index 00000000..7a2af633 --- /dev/null +++ b/docs/source/_static/css/style.css @@ -0,0 +1,11 @@ +.wy-nav-content { + max-width: 65% !important; +} + +/* +Fix for horizontal stacking weirdness in the RTD theme with Python properties: +https://github.com/readthedocs/sphinx_rtd_theme/issues/1301 +*/ +.py.property { + display: block !important; +} \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..1a2006de --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,59 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import os, sys + +project = 'SLOTHY' +copyright = '2025, Hanno Becker, Amin Abdulrahman, Matthias Kannwischer, Justus Bergermann' +author = 'Hanno Becker, Amin Abdulrahman, Matthias Kannwischer, Justus Bergermann' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +sys.path.insert(0, os.path.abspath("../../slothy")) + +suppress_warnings = [ + 'misc.highlighting_failure', + 'myst.xref_missing', + 'autodoc2.dup_item' +] + +extensions = [ + 'sphinx.ext.imgmath', + 'sphinx_rtd_theme', + 'myst_parser', + 'autodoc2' +] + +templates_path = ['_templates'] +exclude_patterns = [] + +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} +master_doc = 'index' + + +autodoc2_packages = [ + "../../slothy", +] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ['_static'] +html_logo = "../slothy_logo.png" +html_theme_options = { 'logo_only': True, } +html_css_files = [ + 'css/style.css', +] + + diff --git a/docs/faq.md b/docs/source/faq.md similarity index 81% rename from docs/faq.md rename to docs/source/faq.md index d8bc21f9..16aaddeb 100644 --- a/docs/faq.md +++ b/docs/source/faq.md @@ -1,12 +1,6 @@ ---- -layout: default ---- +# Frequently asked questions -## Frequently asked questions - -[back](index.md) - -#### Is SLOTHY a peephole optimizer? +## Is SLOTHY a peephole optimizer? No. SLOTHY is a _fixed-instruction_ super-optimizer: It keeps instructions and optimizes register allocation, instruction scheduling, and software pipelining. It is the developer's or another tool's @@ -18,32 +12,32 @@ You may want to use SLOTHY on performance-critical workloads for which precise c is beneficial (e.g. because other code-generation techniques do not find ideal instruction sequences) or needed (e.g. because some instructions or instruction patterns have to be avoided for security). --> -#### Is SLOTHY better than {name your favourite superoptimizer}? +## Is SLOTHY better than {name your favourite superoptimizer}? Most likely, they serve different purposes. SLOTHY aims to do one thing well: Optimization _after_ instruction selection. It is thus independent of and potentially combinable with superoptimizers operating at earlier stages of the code-generation process, such as [souper](https://github.com/google/souper) and [CryptOpt](https://github.com/0xADE1A1DE/CryptOpt). -#### Does SLOTHY support x86? +## Does SLOTHY support x86? The core of SLOTHY is architecture- and microarchitecture-agnostic and can accommodate x86. As it stands, however, there is no model of the x86 architecture. Feel free to build one! -#### Does SLOTHY support RISC-V? +## Does SLOTHY support RISC-V? As for x86. -#### Is SLOTHY formally verified? +## Is SLOTHY formally verified? No. Arguably, that wouldn't be a good use of time. The more relevant question is the following: -#### Is SLOTHY-generated code formally verified to be equivalent to the input code? +## Is SLOTHY-generated code formally verified to be equivalent to the input code? Not yet. SLOTHY runs a self-check confirming that input and output have isomorphic data flow graphs, but pitfalls remain, such as bad user configurations allowing SLOTHY to clobber a register that's not meant to be reserved. More work is needed for formal verification of the equivalence of input and output. -#### Why is my question not here? +## Why is my question not here? Ping us! ([GitHub](https://github.com/slothy-optimizer/slothy/issues), or see [paper](https://eprint.iacr.org/2022/1303.pdf) for contact information). \ No newline at end of file diff --git a/docs/source/general.md b/docs/source/general.md new file mode 100644 index 00000000..f47421f9 --- /dev/null +++ b/docs/source/general.md @@ -0,0 +1,122 @@ +# About SLOTHY + +**SLOTHY** - **S**uper **L**azy **O**ptimization of **T**ricky **H**andwritten assembl**Y** - is an assembly-level superoptimizer +for: +1. Instruction scheduling +2. Register allocation +3. Software pipelining (= periodic loop interleaving) + +SLOTHY is generic in the target architecture and microarchitecture. This repository provides instantiations for: +- Armv8.1-M+Helium: Cortex-M55, Cortex-M85 +- AArch64: Cortex-A55, and experimentally Cortex-A72, Cortex-X/Neoverse-V, Apple M1 (Firestorm, Icestorm) + +SLOTHY is discussed in [Fast and Clean: Auditable high-performance assembly via constraint solving](https://eprint.iacr.org/2022/1303). + +## Goal + +SLOTHY enables a development workflow where developers write 'clean' assembly by hand, emphasizing the logic of the computation, while SLOTHY automates microarchitecture-specific micro-optimizations. This accelerates development, keeps manually written code artifacts maintainable, and allows to split efforts for formal verification into the separate verification of the clean code and the micro-optimizations. + +## How it works + +SLOTHY is essentially a constraint solver frontend: It converts the input source into a data flow graph and +builds a constraint model capturing valid instruction schedulings, register renamings, and periodic loop +interleavings. The model is passed to an external constraint solver and, upon success, +a satisfying assignment converted back into the final code. Currently, SLOTHY uses +[Google OR-Tools](https://developers.google.com/optimization) as its constraint solver backend. + +## Performance + +As a rough rule of thumb, SLOTHY typically optimizes workloads of <50 instructions in seconds to minutes, workloads +up to 150 instructions in minutes to hours, while for larger kernels some heuristics are necessary. + +## Applications + +SLOTHY has been used to provide the fastest known implementations of various cryptographic and DSP primitives: +For example, the [SLOTHY paper](https://eprint.iacr.org/2022/1303) discusses the NTTs underlying ML-KEM and ML-DSA for +Cortex-{A55, A72, M55, M85}, the FFT for Cortex-{M55,M85}, and the X25519 scalar multiplication for Cortex-A55. You find +the clean and optimized source code for those examples in [`paper/`](https://github.com/slothy-optimizer/slothy/tree/main/paper). + +# Getting started + +Have a look at the [SLOTHY tutorial](tutorial/README.md) for a hands-on and example-based introduction to SLOTHY. + +# Real world uses + +* [AWS libcrypto (AWS-LC)](https://github.com/aws/aws-lc): SLOTHY-optimized X25519 code based on our un-interleaved form of the [original code by Emil + Lenngren](https://github.com/Emill/X25519-AArch64) has been [formally verified and + included](https://github.com/awslabs/s2n-bignum/pull/108) in + [s2n-bignum](https://github.com/awslabs/s2n-bignum/) (the bignum component of AWS-LC) and [merged](https://github.com/aws/aws-lc/pull/1469) into + AWS-LC. This was the topic of a [Real World Crypto 2024 + talk](https://iacr.org/submit/files/slides/2024/rwc/rwc2024/38/slides.pdf). + +* [s2n-bignum](https://github.com/awslabs/s2n-bignum/) routinely employs SLOTHY for finding +further highly optimized ECC implementations (e.g., [P256](https://github.com/awslabs/s2n-bignum/pull/118), +[P384](https://github.com/awslabs/s2n-bignum/pull/122), [P521](https://github.com/awslabs/s2n-bignum/pull/130) and +verifies them through automated equivalence-checking in [HOL-Light](https://hol-light.github.io/). + +* [Arm EndpointAI](https://github.com/ARM-software/EndpointAI): SLOTHY-optimized code has been deployed to the CMSIS DSP Library for the radix-4 CFFT routines as part + of the Arm EndpointAI project in [this + commit](https://github.com/ARM-software/EndpointAI/commit/817bb57d8a4a604538a04627851f5e9adb5f08fc). + +# Installation + +## Requirements + +SLOTHY has been successfully used on + +- Ubuntu-21.10 and up (64-bit), +- macOS Monterey 12.6 and up. + +SLOTHY requires Python >= 3.10. See [requirements.txt](https://github.com/slothy-optimizer/slothy/blob/main/requirements.txt) for package requirements, and install via `pip +install -r requirements.txt`. + +**Note:** `requirements.txt` pins versions for reproducibility. If you already have newer versions of some dependencies +installed and don't want them downgraded, consider using a virtual environment: + +``` +python3 -m venv venv +./venv/bin/python3 -m pip install -r requirements.txt +``` + +Then, enter the virtual environment via `source venv/bin/activate` prior to running SLOTHY. + +## Docker + +A dockerfile for an Ubuntu-22.04 based Docker image with all dependencies of SLOTHY and the PQMX+PQAX test +environments setup can be found in [paper/artifact/slothy.dockerfile](https://github.com/slothy-optimizer/slothy/blob/main/paper/artifact/slothy.Dockerfile). See +[paper/artifact/README.md](https://github.com/slothy-optimizer/slothy/blob/main/paper/artifact/README.md) for instructions. + +## Quick check + +To check that your setup is complete, try the following from the base directory: + +``` +% python3 example.py --examples aarch64_simple0_a55 +``` + +You should see something like the following: + +``` +* Example: aarch64_simple0_a55... +INFO:aarch64_simple0_a55:Instructions in body: 20 +INFO:aarch64_simple0_a55.slothy:Perform internal binary search for minimal number of stalls... +INFO:aarch64_simple0_a55.slothy:Attempt optimization with max 32 stalls... +INFO:aarch64_simple0_a55.slothy:Objective: minimize number of stalls +INFO:aarch64_simple0_a55.slothy:Invoking external constraint solver (OR-Tools CP-SAT v9.7.2996) ... +INFO:aarch64_simple0_a55.slothy:[0.0721s]: Found 1 solutions so far... objective 19.0, bound 8.0 (minimize number of stalls) +INFO:aarch64_simple0_a55.slothy:[0.0765s]: Found 2 solutions so far... objective 18.0, bound 12.0 (minimize number of stalls) +INFO:aarch64_simple0_a55.slothy:OPTIMAL, wall time: 0.155224 s +INFO:aarch64_simple0_a55.slothy:Booleans in result: 509 +INFO:aarch64_simple0_a55.slothy.selfcheck:OK! +INFO:aarch64_simple0_a55.slothy:Minimum number of stalls: 18 +``` + +## Examples + +The [SLOTHY Tutorial](tutorial/README.md) and the [examples](https://github.com/slothy-optimizer/slothy/tree/main/examples/naive) directory contain numerous exemplary +assembly snippets. To try them, use `python3 example.py --examples={YOUR_EXAMPLE}`. See `python3 example.py --help` for +the list of all available examples. + +The use of SLOTHY from the command line is illustrated in [scripts/](scripts/) supporting the real-world optimizations +for the NTT, FFT and X25519 discussed in [Fast and Clean: Auditable high-performance assembly via constraint +solving](https://eprint.iacr.org/2022/1303). diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..e298ce9a --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,34 @@ +.. Slothy documentation master file, created by + sphinx-quickstart on Wed Dec 11 11:55:58 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Slothy documentation +==================== + +.. include:: ./general.md + :parser: myst_parser.sphinx_ + +.. toctree:: + :maxdepth: 1 + :hidden: + + ./general.md + +.. toctree:: + :maxdepth: 1 + :hidden: + + tutorial/README.md + +.. toctree:: + :hidden: + + ./faq.md + +.. toctree:: + :maxdepth: 2 + :hidden: + + apidocs/index + diff --git a/tutorial/README.md b/docs/source/tutorial/README.md similarity index 88% rename from tutorial/README.md rename to docs/source/tutorial/README.md index 3f7c0ac9..753fbede 100644 --- a/tutorial/README.md +++ b/docs/source/tutorial/README.md @@ -1,7 +1,7 @@ # SLOTHY Tutorial This tutorial introduces you to using the SLOTHY superoptimizer for optimizing assembly programs for a specific microarchitecture. -It goes beyond what is written in the [README](../README.md) or the [SLOTHY +It goes beyond what is written in the [README](../../../README.md) or the [SLOTHY paper](https://eprint.iacr.org/2022/1303.pdf) in that it gives more examples on how we, the developers of SLOTHY, typically use SLOTHY to optimize cryptographic code. At the end of the tutorial, you should be familiar with the workflow of using SLOTHY as well as a number of common ways to debug or improve your results. @@ -51,16 +51,16 @@ formally verified using the [HOL-Light](https://github.com/jrh13/hol-light) proo ## Table of contents -1) [Installation](#1-installation). This is limited to the fastest way of installing SLOTHY using pip. For more complete instructions, see the [README](../README.md). -2) [Getting started](#2-getting-started) -3) [Using SLOTHY for your own code](#3-writing-your-own-calling-code) -4) [Using SLOTHY's Software Pipelining](#4-software-pipelining) -5) [Checking the quality of SLOTHY optimizations](#5-checking-the-quality-of-slothy-optimizations) -6) [Optimizing a full Neon NTT](#6-optimizing-a-full-neon-ntt) -7) [Optimizing larger pieces of code](#7-optimizing-larger-pieces-of-code) -8) [Adding a new microarchitecture](#8-adding-a-new-microarchitecture) +1) [Installation](#installation). This is limited to the fastest way of installing SLOTHY using pip. For more complete instructions, see the [README](../../../README.md). +2) [Getting started](#getting-started) +3) [Using SLOTHY for your own code](#writing-your-own-calling-code) +4) [Using SLOTHY's Software Pipelining](#software-pipelining) +5) [Checking the quality of SLOTHY optimizations](#checking-the-quality-of-slothy-optimizations) +6) [Optimizing a full Neon NTT](#optimizing-a-full-neon-ntt) +7) [Optimizing larger pieces of code](#optimizing-larger-pieces-of-code) +8) [Adding a new microarchitecture](#adding-a-new-microarchitecture) -The SLOTHY calling code used for the parts 3-7 is located in `tutorial-{3a,3b,4,5,6,7}.py`. +The SLOTHY calling code used for the parts 3-7 is located in [tutorial-files/tuturial-{3a,3b,4,5,6,7}.py](https://github.com/slothy-optimizer/slothy/tree/main/tutorial_files). ## 1. Installation @@ -91,12 +91,12 @@ We will look into more examples shortly and discuss input, output, and available The simplest way to get started using SLOTHY is by trying out some of the examples that come with SLOTHY. Once you work on your own code, you will likely be using the `slothy-cli` command or calling the SLOTHY module from your own Python script for invoking SLOTHY allowing you to control all the different options SLOTHY has. -However, for now we will be using the [example.py](../example.py) script and containing a number of examples including the ones we have optimized in the SLOTHY paper. +However, for now we will be using the [example.py](https://github.com/slothy-optimizer/slothy/blob/main/example.pyy) script and containing a number of examples including the ones we have optimized in the SLOTHY paper. You can run `python3 example.py --help` to see all examples available. Let's look at a very simple example from the previous section called `aarch64_simple0`. -You can find the corresponding code in [examples/naive/aarch64/aarch64_simple0.s](../examples/naive/aarch64/aarch64_simple0.s): -```nasm +You can find the corresponding code in [examples/naive/aarch64/aarch64_simple0.s](https://github.com/slothy-optimizer/slothy/blob/main/examples/naive/aarch64/aarch64_simple0.s): +```asm ldr q0, [x1, #0] ldr q1, [x2, #0] @@ -133,10 +133,10 @@ Note, however, that SLOTHY has been used to also obtain significant speed-ups fo SLOTHY comes with models for various Arm architectures, including the power-efficient, in-order [Cortex-A55](https://developer.arm.com/Processors/Cortex-A55), so we can now optimize this piece of code for that -microarchitecture. [example.py](../example.py) contains the needed SLOTHY incarnations for convenience, so we can simply run `python3 +microarchitecture. [example.py](https://github.com/slothy-optimizer/slothy/blob/main/example.py) contains the needed SLOTHY incarnations for convenience, so we can simply run `python3 example.py --examples aarch64_simple0_a55` which will optimize for the Cortex-A55 microarchitecture. You can check -[example.py](../example.py) for the details. This will optimize the piece of code above and write the output code to -[examples/opt/aarch64/aarch64_simple0_opt_a55.s](../examples/opt/aarch64/aarch64_simple0_opt_a55.s). +[example.py](https://github.com/slothy-optimizer/slothy/blob/main/example.pyy) for the details. This will optimize the piece of code above and write the output code to +[examples/opt/aarch64/aarch64_simple0_opt_a55.s](https://github.com/slothy-optimizer/slothy/blob/main/examples/opt/aarch64/aarch64_simple0_opt_a55.s). SLOTHY should print something similar to this: ``` INFO:aarch64_simple0_a55:Instructions in body: 20 @@ -158,8 +158,8 @@ The best solution it can find has 16 stalls -- which is guaranteed to be the min In the last step, SLOTHY will transform the found traversal of the DFG into actual assembly and write it to the file. To make sure everything worked out as expected, it will perform a selfcheck which consists of transforming the output assembly into a DFG again and testing that the resulting graph is isomorphic to the input DFG. -We can now take a look at the output assembly in [examples/opt/aarch64/aarch64_simple0_opt_a55.s](../examples/opt/aarch64/aarch64_simple0_opt_a55.s): -```nasm +We can now take a look at the output assembly in [examples/opt/aarch64/aarch64_simple0_opt_a55.s](https://github.com/slothy-optimizer/slothy/blob/main/examples/opt/aarch64/aarch64_simple0_opt_a55.s): +```asm ldr q8, [x1, #0] // *................... // gap // .................... // gap // .................... @@ -296,7 +296,7 @@ microarchitecture model that come with SLOTHY. The calls to SLOTHY should be self-explanatory: - `load_source_from_file` loads an assembly file to be optimized. - - `slothy.config` can be used to configure SLOTHY. For the documentation of the configuration options, see the comments in [config.py](../slothy/core/config.py). + - `slothy.config` can be used to configure SLOTHY. For the documentation of the configuration options, see the comments in [config.py](https://github.com/slothy-optimizer/slothy/blob/main/slothy/core/config.py). - `optimize` performs the actual optimizations by calling the external constraint solver. - `write_source_to_file` writes back the optimized assembly to a file. @@ -312,62 +312,62 @@ find the minimum number of stalls for which a solution exists. Even with this small Neon example, you can see that understanding the input code is much easier than the output code. In fact, the input code can be further clarified through the use of macros and register aliases, leading to the following 'clean' version from -[examples/naive/aarch64/aarch64_simple0_macros.s](../examples/naive/aarch64/aarch64_simple0_macros.s) which makes it +[examples/naive/aarch64/aarch64_simple0_macros.s](https://github.com/slothy-optimizer/slothy/blob/main/examples/naive/aarch64/aarch64_simple0_macros.s) which makes it apparent that our example is just a pair of NTT butterflies using Barrett multiplication. Note that the `.req` and `.macro` directives used here are commonly supported [assembly directives](https://www.sourceware.org/binutils/docs/as/ARM-Directives.html). -```nasm -qdata0 .req q8 -qdata1 .req q9 -qdata2 .req q10 -qdata3 .req q11 - -qtwiddle .req q0 - -data0 .req v8 -data1 .req v9 -data2 .req v10 -data3 .req v11 - -twiddle .req v0 -modulus .req v1 - -tmp .req v12 - -data_ptr .req x0 -twiddle_ptr .req x1 - -.macro barmul out, in, twiddle, modulus - mul \out.8h, \in.8h, \twiddle.h[0] - sqrdmulh \in.8h, \in.8h, \twiddle.h[1] - mls \out.8h, \in.8h, \modulus.h[0] -.endm - -.macro butterfly data0, data1, tmp, twiddle, modulus - barmul \tmp, \data1, \twiddle, \modulus - sub \data1.8h, \data0.8h, \tmp.8h - add \data0.8h, \data0.8h, \tmp.8h -.endm - -start: - - ldr qtwiddle, [twiddle_ptr, #0] - - ldr qdata0, [data_ptr, #0*16] - ldr qdata1, [data_ptr, #1*16] - ldr qdata2, [data_ptr, #2*16] - ldr qdata3, [data_ptr, #3*16] - - butterfly data0, data1, tmp, twiddle, modulus - butterfly data2, data3, tmp, twiddle, modulus - - str qdata0, [data_ptr], #4*16 - str qdata1, [data_ptr, #-3*16] - str qdata2, [data_ptr, #-2*16] - str qdata3, [data_ptr, #-1*16] - -end: +```asm + qdata0 .req q8 + qdata1 .req q9 + qdata2 .req q10 + qdata3 .req q11 + + qtwiddle .req q0 + + data0 .req v8 + data1 .req v9 + data2 .req v10 + data3 .req v11 + + twiddle .req v0 + modulus .req v1 + + tmp .req v12 + + data_ptr .req x0 + twiddle_ptr .req x1 + + .macro barmul out, in, twiddle, modulus + mul \out.8h, \in.8h, \twiddle.h[0] + sqrdmulh \in.8h, \in.8h, \twiddle.h[1] + mls \out.8h, \in.8h, \modulus.h[0] + .endm + + .macro butterfly data0, data1, tmp, twiddle, modulus + barmul \tmp, \data1, \twiddle, \modulus + sub \data1.8h, \data0.8h, \tmp.8h + add \data0.8h, \data0.8h, \tmp.8h + .endm + + start: + + ldr qtwiddle, [twiddle_ptr, #0] + + ldr qdata0, [data_ptr, #0*16] + ldr qdata1, [data_ptr, #1*16] + ldr qdata2, [data_ptr, #2*16] + ldr qdata3, [data_ptr, #3*16] + + butterfly data0, data1, tmp, twiddle, modulus + butterfly data2, data3, tmp, twiddle, modulus + + str qdata0, [data_ptr], #4*16 + str qdata1, [data_ptr, #-3*16] + str qdata2, [data_ptr, #-2*16] + str qdata3, [data_ptr, #-1*16] + + end: ``` SLOTHY will then internally expand all macros and the resulting DFG will be exactly the same as before. @@ -403,7 +403,7 @@ postamble, respectively. Let's look at an example demonstrating how SLOTHY can perform software pipelining for you. Consider the simple case of performing the code from the previous example within a loop with a fixed number of iterations (>=2). This is exactly what the `aarch64_simple0_loop` example in SLOTHY does: -```nasm +```asm ... // .req and .macro as above count .req x2 @@ -452,7 +452,7 @@ automatically detect that the loop ends at `cbnz count, start`. Finally, `optimi it would by default -- you normally want this set, but we unset it here to simplify the output. This is what it will look like: -```nasm +```asm // ... count .req x2 ldr qtwiddle, [twiddle_ptr, #0] @@ -598,7 +598,7 @@ slothy.write_source_to_file("./aarch64_simple0_loop_mca_a55.s") This will call LLVM MCA on both the original code and the optimized code and append the LLVM MCA statistics as a comment to the output. Somewhere in the code you will see: -```nasm +```asm // LLVM MCA STATISTICS (ORIGINAL) BEGIN // // Iterations: 100 @@ -630,7 +630,7 @@ This suggests that our optimizations were actually useful: With respect to LLVM- cycle count per iteration was reduced from 31 cycles to 21 cycles. But LLVM MCA gives you more: It outputs a timeline view showing how each instruction travels through the pipeline: -```nasm +```asm // Timeline view (ORIGINAL): // 0123456789 0123456789 0123456789 0123456789 01234 // Index 0123456789 0123456789 0123456789 0123456789 0123456789 @@ -769,48 +769,49 @@ as part of the [pqax](https://github.com/slothy-optimizer/pqax) benchmarking fra input and automatically generates a program running and benchmarking prefixes of the input, and combining them into a performance diagram similar to the one generated by LLVM-MCA. Here's the output in our case: -```nasm -===== Stepwise profiling ======= -[ 0]: ldr q0, [x1, #0] ......*..................................... -[ 1]: ldr q8, [x0, #0*16] .......*.................................... -[ 2]: ldr q9, [x0, #1*16] .........*.................................. -[ 3]: ldr q10, [x0, #2*16] ...........*................................ -[ 4]: ldr q11, [x0, #3*16] .............*.............................. -[ 5]: mul v12.8h, v9.8h, v0.h[0] ...............*............................ -[ 6]: sqrdmulh v9.8h, v9.8h, v0.h[1] ................*........................... -[ 7]: mls v12.8h, v9.8h, v1.h[0] .................*.......................... -[ 8]: sub v9.8h, v8.8h, v12.8h .....................*...................... -[ 9]: add v8.8h, v8.8h, v12.8h ........................*................... -[ 10]: mul v12.8h, v11.8h, v0.h[0] .........................*.................. -[ 11]: sqrdmulh v11.8h, v11.8h, v0.h[1] ..........................*................. -[ 12]: mls v12.8h, v11.8h, v1.h[0] ...........................*................ -[ 13]: sub v11.8h, v10.8h, v12.8h ...............................*............ -[ 14]: add v10.8h, v10.8h, v12.8h ..................................*......... -[ 15]: str q8, [x0], #4*16 ...................................*........ -[ 16]: str q9, [x0, #-3*16] .....................................*...... -[ 17]: str q10, [x0, #-2*16] .....................................*...... -[ 18]: str q11, [x0, #-1*16] ........................................*... - -===== Stepwise profiling (OPTIMIZED) ======= -[ 0]: ldr q18, [x0, #16] // .*........................ -[ 1]: sqrdmulh v8.8H, v6.8H, v2.H[1] // ..*....................... -[ 2]: mul v23.8H, v6.8H, v2.H[0] // ...*...................... -[ 3]: ldr q31, [x0, #32] // ....*..................... -[ 4]: mul v3.8H, v18.8H, v2.H[0] // ......*................... -[ 5]: mls v23.8H, v8.8H, v1.H[0] // .......*.................. -[ 6]: sqrdmulh v9.8H, v18.8H, v2.H[1] // ........*................. -[ 7]: ldr q15, [x0, #0] // .........*................ -[ 8]: sub v11.8H, v31.8H, v23.8H // ...........*.............. -[ 9]: mls v3.8H, v9.8H, v1.H[0] // ............*............. -[ 10]: add v16.8H, v31.8H, v23.8H // .............*............ -[ 11]: str q11, [x0, #48] // ..............*........... -[ 12]: ldr q2, [x1, #0] // ...............*.......... -[ 13]: add v13.8H, v15.8H, v3.8H // .................*........ -[ 14]: str q16, [x0, #32] // ..................*....... -[ 15]: sub v7.8H, v15.8H, v3.8H // ...................*...... -[ 16]: str q13, [x0], #4*16 // ....................*..... -[ 17]: ldr q6, [x0, #48] // .....................*.... -[ 18]: str q7, [x0, #-48] // .......................*.. +```asm + + ===== Stepwise profiling ======= + [ 0]: ldr q0, [x1, #0] ......*..................................... + [ 1]: ldr q8, [x0, #0*16] .......*.................................... + [ 2]: ldr q9, [x0, #1*16] .........*.................................. + [ 3]: ldr q10, [x0, #2*16] ...........*................................ + [ 4]: ldr q11, [x0, #3*16] .............*.............................. + [ 5]: mul v12.8h, v9.8h, v0.h[0] ...............*............................ + [ 6]: sqrdmulh v9.8h, v9.8h, v0.h[1] ................*........................... + [ 7]: mls v12.8h, v9.8h, v1.h[0] .................*.......................... + [ 8]: sub v9.8h, v8.8h, v12.8h .....................*...................... + [ 9]: add v8.8h, v8.8h, v12.8h ........................*................... + [ 10]: mul v12.8h, v11.8h, v0.h[0] .........................*.................. + [ 11]: sqrdmulh v11.8h, v11.8h, v0.h[1] ..........................*................. + [ 12]: mls v12.8h, v11.8h, v1.h[0] ...........................*................ + [ 13]: sub v11.8h, v10.8h, v12.8h ...............................*............ + [ 14]: add v10.8h, v10.8h, v12.8h ..................................*......... + [ 15]: str q8, [x0], #4*16 ...................................*........ + [ 16]: str q9, [x0, #-3*16] .....................................*...... + [ 17]: str q10, [x0, #-2*16] .....................................*...... + [ 18]: str q11, [x0, #-1*16] ........................................*... + + ===== Stepwise profiling (OPTIMIZED) ======= + [ 0]: ldr q18, [x0, #16] // .*........................ + [ 1]: sqrdmulh v8.8H, v6.8H, v2.H[1] // ..*....................... + [ 2]: mul v23.8H, v6.8H, v2.H[0] // ...*...................... + [ 3]: ldr q31, [x0, #32] // ....*..................... + [ 4]: mul v3.8H, v18.8H, v2.H[0] // ......*................... + [ 5]: mls v23.8H, v8.8H, v1.H[0] // .......*.................. + [ 6]: sqrdmulh v9.8H, v18.8H, v2.H[1] // ........*................. + [ 7]: ldr q15, [x0, #0] // .........*................ + [ 8]: sub v11.8H, v31.8H, v23.8H // ...........*.............. + [ 9]: mls v3.8H, v9.8H, v1.H[0] // ............*............. + [ 10]: add v16.8H, v31.8H, v23.8H // .............*............ + [ 11]: str q11, [x0, #48] // ..............*........... + [ 12]: ldr q2, [x1, #0] // ...............*.......... + [ 13]: add v13.8H, v15.8H, v3.8H // .................*........ + [ 14]: str q16, [x0, #32] // ..................*....... + [ 15]: sub v7.8H, v15.8H, v3.8H // ...................*...... + [ 16]: str q13, [x0], #4*16 // ....................*..... + [ 17]: ldr q6, [x0, #48] // .....................*.... + [ 18]: str q7, [x0, #-48] // .......................*.. ``` We can see that SLOTHY's predictions were exactly right, and that LLVM-MCA's model is off in a few places. @@ -823,7 +824,7 @@ The examples previously considered were all toy examples, so you may wonder how Let's look at a real-world example: The Kyber number-theoretic transform -- a core arithmetic function of the Kyber key-encapsulation mechanism making up a large chunk of the total run-time. The target platform is again the Arm Cortex-A55 and the code primarily consists of Neon vector instructions. -We'll consider a straightforward implementation available here: [ntt_kyber_123_4567.s](../examples/naive/aarch64/ntt_kyber_123_4567.s). +We'll consider a straightforward implementation available here: [ntt_kyber_123_4567.s](https://github.com/slothy-optimizer/slothy/blob/main/examples/naive/aarch64/ntt_kyber_123_4567.s). If you have ever written an NTT, it should be fairly easy to understand what the code is doing. The code consists of 2 main loops implementing layers 1+2+3 and 4+5+6+7 of the NTT. The actual operations are wrapped in macros implementing butterflies on single vector registers. @@ -844,7 +845,7 @@ slothy.write_source_to_file("opt/ntt_kyber_123_4567_opt_a55.s") ``` We simply optimize both loops separately. -You will notice some additional flags we have set. To read the documentation of those, please have a look at [config.py](../slothy/core/config.py). +You will notice some additional flags we have set. To read the documentation of those, please have a look at [config.py](https://github.com/slothy-optimizer/slothy/blob/main/slothy/core/config.py). We have set an additional flag: `inputs_are_outputs = True`. This tells SLOTHY that the registers that are used as inputs to the loop (e.g., the pointer to the polynomial input) are also outputs of the entire loop; otherwise, SLOTHY could overwrite them in the postamble once they are no longer needed. You most likely want `inputs_are_outputs=True` @@ -856,7 +857,7 @@ registers. If you are familiar with inline assembly, SLOTHY's `reserved_regs` ar When running this example, you will notice that it has a significantly longer runtime. On my Intel i7-1360P it takes approximately 15 minutes to optimize both loops. -You may instead look at an optimized version of the same code [examples/opt/aarch64/ntt_kyber_123_4567_opt_a55.s](../examples/opt/aarch64/ntt_kyber_123_4567_opt_a55.s). +You may instead look at an optimized version of the same code [examples/opt/aarch64/ntt_kyber_123_4567_opt_a55.s](https://github.com/slothy-optimizer/slothy/blob/main/examples/opt/aarch64/ntt_kyber_123_4567_opt_a55.s). You notice that both loops have many early instructions, and coming up with this code by hand would be tedious, time-consuming and error-prone. @@ -867,7 +868,7 @@ When using a more powerful machine and allowing optimization times of hours, one We've successfully used (vanilla) SLOTHY for optimized code snippets of up to 180 instructions. However, for larger code at a certain point the constraint solving becomes prohibitively expensive and we need to use a different strategy. -One such example is the X25519 implementation we looked at in the [SLOTHY paper](https://eprint.iacr.org/2022/1303) available in [X25519-AArch64-simple.s](../examples/naive/aarch64/X25519-AArch64-simple.s) +One such example is the X25519 implementation we looked at in the [SLOTHY paper](https://eprint.iacr.org/2022/1303) available in [X25519-AArch64-simple.s](https://github.com/slothy-optimizer/slothy/blob/main/examples/naive/aarch64/X25519-AArch64-simple.s) It is a hybrid vector-scalar implementation based on an [implementation](https://github.com/Emill/X25519-AArch64) by Lenngren. Its core loop consists of 958 instructions which well exceeds what SLOTHY can currently optimize in a single pass. @@ -920,15 +921,15 @@ We can configure SLOTHY to only consider register allocation by setting the `all In this way, the constraints remain manageable, and SLOTHY finds a register allocation within a few minutes. Running this example takes around 15 minutes. -You can instead look at the output available in [opt/X25519-AArch64-simple_opt.s](opt/X25519-AArch64-simple_opt.s) +You can instead look at the output available in [opt/X25519-AArch64-simple_opt.s](https://github.com/slothy-optimizer/slothy/tree/main/tutorial_files/opt/X25519-AArch64-simple_opt.s) The output will look similar to the previous examples and contains significantly less pipeline stalls than the input. -For achieving the best performance, we require a few more calls to SLOTHY. You can find the script we used [here](../paper/scripts/slothy_x25519.sh) - it runs around 1.5 hours. +For achieving the best performance, we require a few more calls to SLOTHY. You can find the script we used [here](https://github.com/slothy-optimizer/slothy/blob/main/paper/scripts/slothy_x25519.sh) - it runs around 1.5 hours. ## 8. Adding a new microarchitecture You may wonder how to extend SLOTHY to include a new microarchitecture. For example, you may want to optimize code for a newer iteration of the Arm Cortex-A55, e.g., the Arm Cortex-A510. -To understand what is needed for that, let's look at the microarchitectural model for the Cortex-A55 available in [slothy/targets/aarch64/cortex_a55.py](../slothy/targets/aarch64/cortex_a55.py). +To understand what is needed for that, let's look at the microarchitectural model for the Cortex-A55 available in [slothy/targets/aarch64/cortex_a55.py](https://github.com/slothy-optimizer/slothy/blob/main/slothy/targets/aarch64/cortex_a55.py). Skipping some boilerplate code, you will see the following structure: ```python @@ -984,7 +985,7 @@ Going through the snippet, we can see the core components: - Finally, we need to implement the functions `get_latency`, `get_units`, `get_inverse_throughput` returning the latency, occupied execution units, and throughputs. The input to these functions is a class from the architectural model representing the instruction in question. For example, the class `vmull` in - [aarch64_neon.py](../slothy/targets/aarch64/aarch64_neon.py) corresponds to the `umull` instruction. We commonly + [aarch64_neon.py](https://github.com/slothy-optimizer/slothy/blob/main/slothy/targets/aarch64/aarch64_neon.py) corresponds to the `umull` instruction. We commonly implement this using dictionaries above. For example, for the (128-bit/qform) `vmull` instruction, we can find in the [Arm Cortex-A55 Software Optimization diff --git a/tutorial/opt/X25519-AArch64-simple_opt.s b/docs/source/tutorial/opt/X25519-AArch64-simple_opt.s similarity index 100% rename from tutorial/opt/X25519-AArch64-simple_opt.s rename to docs/source/tutorial/opt/X25519-AArch64-simple_opt.s diff --git a/tutorial/opt/aarch64_simple0_loop_opt_a55.s b/docs/source/tutorial/opt/aarch64_simple0_loop_opt_a55.s similarity index 100% rename from tutorial/opt/aarch64_simple0_loop_opt_a55.s rename to docs/source/tutorial/opt/aarch64_simple0_loop_opt_a55.s diff --git a/tutorial/opt/aarch64_simple0_loop_opt_mca_a55.s b/docs/source/tutorial/opt/aarch64_simple0_loop_opt_mca_a55.s similarity index 100% rename from tutorial/opt/aarch64_simple0_loop_opt_mca_a55.s rename to docs/source/tutorial/opt/aarch64_simple0_loop_opt_mca_a55.s diff --git a/tutorial/opt/aarch64_simple0_macros_opt_a55.s b/docs/source/tutorial/opt/aarch64_simple0_macros_opt_a55.s similarity index 100% rename from tutorial/opt/aarch64_simple0_macros_opt_a55.s rename to docs/source/tutorial/opt/aarch64_simple0_macros_opt_a55.s diff --git a/tutorial/opt/aarch64_simple0_opt_a55.s b/docs/source/tutorial/opt/aarch64_simple0_opt_a55.s similarity index 100% rename from tutorial/opt/aarch64_simple0_opt_a55.s rename to docs/source/tutorial/opt/aarch64_simple0_opt_a55.s diff --git a/tutorial/opt/ntt_kyber_123_4567_opt_a55.s b/docs/source/tutorial/opt/ntt_kyber_123_4567_opt_a55.s similarity index 100% rename from tutorial/opt/ntt_kyber_123_4567_opt_a55.s rename to docs/source/tutorial/opt/ntt_kyber_123_4567_opt_a55.s diff --git a/requirements.txt b/requirements.txt index 08b8ea89..cb58fd01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,11 @@ numpy==1.26.4 ortools==9.7.2996 pandas==2.1.1 sympy==1.12 +sphinx==8.1.3 +sphinx_rtd_theme==3.0.2 +sphinx_autoapi +sphinx_mdinclude +myst_parser +sphinx-autobuild +sphinx-autodoc2 unicorn==2.1.1 diff --git a/slothy/core/config.py b/slothy/core/config.py index f3ac1323..39f5fc13 100644 --- a/slothy/core/config.py +++ b/slothy/core/config.py @@ -37,32 +37,37 @@ from slothy.helper import LockAttributes, NestedPrint class InvalidConfig(Exception): - """Exception raised when an invalid SLOTHY configuration is detected""" + """Exception raised when an invalid SLOTHY configuration is detected + """ class Config(NestedPrint, LockAttributes): """Configuration for Slothy. This configuration object is used both for one-shot optimizations using - SlothyBase, as well as stateful multi-pass optimizations using Slothy.""" + SlothyBase, as well as stateful multi-pass optimizations using Slothy. + """ @property def arch(self): """The module defining the underlying architecture used by Slothy. - TODO: Add details on what exactly is assumed about this module.""" + TODO: Add details on what exactly is assumed about this module. + """ return self._arch @property def target(self): """The module defining the target microarchitecture used by Slothy. - TODO: Add details on what exactly is assumed about this module.""" + TODO: Add details on what exactly is assumed about this module. + """ return self._target @property def outputs(self): """List defining of architectural or symbolic registers that should - be considered as outputs of the input snippet.""" + be considered as outputs of the input snippet. + """ return self._outputs @property @@ -74,14 +79,19 @@ def reserved_regs(self): In the lingo of inline assembly, this can be seen as the complement of the clobber list. - NOTE: Reserved registers are, by default, considered "locked": They - will not be _introduced_ during renaming, but existing uses will not - be touched. If you want to remove existing uses of reserved registers - through renaming, you should disable `reserved_regs_are_locked`. + .. note:: - WARNING: When this is set, it _overwrites_ the default reserved registers for - the target architecture. If you still want the default reserved - registers to remain reserved, you have to explicitly list them!""" + Reserved registers are, by default, considered "locked": They + will not be _introduced_ during renaming, but existing uses will not + be touched. If you want to remove existing uses of reserved registers + through renaming, you should disable `reserved_regs_are_locked`. + + .. warning:: + + When this is set, it _overwrites_ the default reserved registers for + the target architecture. If you still want the default reserved + registers to remain reserved, you have to explicitly list them! + """ if self._reserved_regs is not None: return self._reserved_regs return self._arch.RegisterType.default_reserved() @@ -97,12 +107,14 @@ def reserved_regs_are_locked(self): Disable this configuration option to allow (in fact, force) renaming of existing uses of reserved registers. This can be useful when trying - to eliminate uses of particular registers from some piece of assembly.""" + to eliminate uses of particular registers from some piece of assembly. + """ return self._reserved_regs_are_locked @property def selftest(self): - """Indicates whether SLOTHY performs an empirical equivalence-test on the + """ + Indicates whether SLOTHY performs an empirical equivalence-test on the optimization results. When this is set, and if the target architecture and host platform support it, @@ -111,55 +123,68 @@ def selftest(self): The primary purpose of this checker is to detect issue that would presently be overlooked by the selfcheck: - - The selfcheck is currently blind to address offset fixup. If something goes + + * The selfcheck is currently blind to address offset fixup. If something goes wrong, the input and output will not be functionally equivalent, but we would only notice once we actually compile and run the code. The selftest will likely catch issues. - - When using software pipelining, the selfcheck reduces to a straightline check + + * When using software pipelining, the selfcheck reduces to a straightline check for a bounded unrolling of the loop. An unbounded selfcheck is currently not - implemented. - With the selftest, you still need to fix a loop bound, but at least you can + implemented. With the selftest, you still need to fix a loop bound, but at least you can equivalence-check the loop-form (including the compare+branch instructions at the loop boundary) rather than the unrolled code. - DEPENDENCY: To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc` - in your PATH. Those are part of a standard LLVM setup. + .. important:: + + To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc` + in your PATH. Those are part of a standard LLVM setup. + + .. note:: - NOTE: This is so far implemented as a repeated randomized test -- nothing clever. + This is so far implemented as a repeated randomized test -- nothing clever. """ return self._selftest @property def selftest_iterations(self): - """If selftest is set, indicates the number of random selftest to conduct""" + """If selftest is set, indicates the number of random selftest to conduct + """ return self._selftest_iterations @property def selftest_address_registers(self): """Dictionary of (reg, sz) items indicating which registers are assumed to be - pointers to memory, and if so, of what size.""" + pointers to memory, and if so, of what size. + """ return self._selftest_address_registers @property def selftest_default_memory_size(self): """Default buffer size to use for registers which are automatically inferred to be - used as pointers and for which no memory size has been configured via `address_registers`.""" + used as pointers and for which no memory size has been configured via `address_registers`. + """ return self._selftest_default_memory_size @property def selfcheck(self): + """Indicates whether SLOTHY performs a self-check on the optimization result. The selfcheck confirms that the scheduling permutation found by SLOTHY yields an isomorphism between the data flow graphs of the original and optimized code. - WARNING: Do not unset this option unless you know what you are doing. + .. warning:: + + Do not unset this option unless you know what you are doing. It is vital in catching bugs in the model generation early. - WARNING: The selfcheck is not a formal verification of SLOTHY's output! + .. warning:: + + The selfcheck is not a formal verification of SLOTHY's output! There are at least two classes of bugs uncaught by the selfcheck: - - User configuration issues: The selfcheck validates SLOTHY's optimization + * User configuration issues: The selfcheck validates SLOTHY's optimization in the context of the provided configuration. Validation of the configuration is the user's responsibility. Two common pitfalls include missing reserved registers (allowing SLOTHY to clobber more registers than intended), or @@ -169,20 +194,23 @@ def selfcheck(self): This is the most common source of issues for code passing the selfcheck but remaining functionally incorrect. - - Bugs in address offset fixup: SLOTHY's modelling of post-load/store address + * Bugs in address offset fixup: SLOTHY's modelling of post-load/store address increments is deliberately inaccurate to allow for reordering of such instructions leveraging commutativity relations such as - ``` - LDR X,[A],#imm; STR Y,[A] === STR Y,[A, #imm]; LDR X,[A],#imm - ``` + .. code-block:: asm - (See also section "Address offset rewrites" in the SLOTHY paper). + LDR X,[A],#imm; STR Y,[A] === STR Y,[A, #imm]; LDR X,[A],#imm - Bugs in SLOTHY's address fixup logic would not be caught by the selfcheck. - If your code doesn't work and you are sure to have configured SLOTHY correctly, - you may therefore want to double-check that address offsets have been adjusted - correctly by SLOTHY. + + .. hint:: + + See also section "Address offset rewrites" in the SLOTHY paper + + Bugs in SLOTHY's address fixup logic would not be caught by the selfcheck. + If your code doesn't work and you are sure to have configured SLOTHY correctly, + you may therefore want to double-check that address offsets have been adjusted + correctly by SLOTHY. """ return self._selfcheck @@ -191,7 +219,8 @@ def selfcheck_failure_logfile(self): """The filename for the log of a failing selfcheck. This is printed in the terminal as well, but difficult to analyze for its - sheer size.""" + sheer size. + """ return self._selfcheck_failure_logfile @property @@ -201,32 +230,37 @@ def unsafe_address_offset_fixup(self): Address offset fixup is a feature which leverages commutativity relations such as - ``` - ldr X, [A], #immA; - str Y, [A, #immB] - == - str Y, [A, #(immB+immA)] - ldr X, [A], #immA - ``` + .. code-block:: asm + + ldr X, [A], #immA; + str Y, [A, #immB] + == + str Y, [A, #(immB+immA)] + ldr X, [A], #immA to achieve greater instruction scheduling flexibility in SLOTHY. - SAFETY: - When you enable this feature, you MUST ensure that registers which are - used for addresses are not used in any other instruction than load and - stores. OTHERWISE, THE USE OF THIS FEATURE IS UNSOUND (you may see ldr/ - str instructions with increment reordered with instructions depending - on the address register). + .. important:: + + When you enable this feature, you MUST ensure that registers which are + used for addresses are not used in any other instruction than load and + stores. OTHERWISE, THE USE OF THIS FEATURE IS UNSOUND (you may see ldr/ + str instructions with increment reordered with instructions depending + on the address register). By default, this is enabled for backwards compatibility. - LIMITATION: For historical reason, this feature cannot be disabled for - the Armv8.1-M architecture model. A refactoring of that model is needed - to make address offset fixup configurable. + .. note:: - Note: The user-imposed safety constraint is not a necessity -- in principle, - SLOTHY could detect when it is safe to reorder ldr/str instructions with increment. - It just hasn't been implemented yet. + For historical reason, this feature cannot be disabled for + the Armv8.1-M architecture model. A refactoring of that model is needed + to make address offset fixup configurable. + + .. note:: + + The user-imposed safety constraint is not a necessity -- in principle, + SLOTHY could detect when it is safe to reorder ldr/str instructions with increment. + It just hasn't been implemented yet. """ return self._unsafe_address_offset_fixup @@ -243,7 +277,9 @@ def allow_useless_instructions(self): a sign of a buggy configuration, which would likely lead to intended output registers being clobbered by later instructions. - WARNING: Don't disable this option unless you know what you are doing! + .. warning:: + + Don't disable this option unless you know what you are doing! Disabling this option makes it much easier to overlook configuration issues in SLOTHY and can lead to hard-to-debug optimization failures. """ @@ -273,18 +309,21 @@ def keep_tags(self): Tags include pre/core/post or ordering annotations that usually become meaningless post-optimization. However, for preprocessing runs that do not reorder code, it makes - sense to keep them.""" + sense to keep them. + """ return self._keep_tags @property def inherit_macro_comments(self): """Indicates whether comments at macro invocations should be inherited to instructions - in the macro body.""" + in the macro body. + """ return self._inherit_macro_comments @property def ignore_tags(self): - """Indicates whether tags in the input source should be ignored.""" + """Indicates whether tags in the input source should be ignored. + """ return self._ignore_tags @property @@ -295,11 +334,13 @@ def register_aliases(self): in by hand. This is always joined with a list of default aliases (such as lr mapping to r14) - specified in the target architecture.""" + specified in the target architecture. + """ return { **self._register_aliases, **self._arch.RegisterType.default_aliases() } def add_aliases(self, new_aliases): - """Add further register aliases to the configuration""" + """Add further register aliases to the configuration + """ self._register_aliases = { **self._register_aliases, **new_aliases } @property @@ -327,13 +368,14 @@ def rename_inputs(self): inputs are not renamed, while symbolic inputs are dynamically renamed. Examples: - - Generally, unless you are prepared to modify surrounding code, you should + + * Generally, unless you are prepared to modify surrounding code, you should have "arch" : "static", which will not rename inputs which already have architectural register names. - - Config.rename_inputs = { "other" : "any" } + * Config.rename_inputs = { "other" : "any" } This would rename _all_ inputs, regardless of whether they're symbolic or not. Thus, you'd likely need to modify surrounding code. - - Config.rename_inputs = { "in" : "r0", "arch" : "static", "symbolic" : "any" } + * Config.rename_inputs = { "in" : "r0", "arch" : "static", "symbolic" : "any" } This would rename the symbolic input GPR 'in' to 'r0', keep all other inputs which already have an architectural name, while dynamically assigning suitable registers for symbolic inputs. @@ -367,14 +409,16 @@ def inputs_are_outputs(self): _Moreover_, such simultaneous input-outputs are forced to reside in the same architectural register at the beginning and end of the snippet. - This should usually be set when optimizing loops.""" + This should usually be set when optimizing loops. + """ return self._inputs_are_outputs @property def locked_registers(self): """List of architectural registers that should not be renamed when they are used as output registers. Reserved registers are treated as locked if - the option `reserved_regs_are_locked` is set.""" + the option `reserved_regs_are_locked` is set. + """ if self.reserved_regs_are_locked: return set(self.reserved_regs).union(self._locked_registers) else: @@ -384,26 +428,30 @@ def locked_registers(self): def sw_pipelining(self): """Subconfiguration for software pipelining. Enabled/Disabled via the sub-field sw_pipelining.enabled. See Config.SoftwarePipelining - for more information.""" + for more information. + """ return self._sw_pipelining @property def constraints(self): """Subconfiguration for constraints to be considered by SLOTHY, e.g. whether latencies or functional units are modelled. - See Config.Constraints for more information.""" + See Config.Constraints for more information. + """ return self._constraints @property def hints(self): """Subconfiguration for hints to be considered by SLOTHY. - See Config.Hints for more information.""" + See Config.Hints for more information. + """ return self._hints @property def max_solutions(self): """The maximum number of solution found by the underlying constraint - solver before it stops the search.""" + solver before it stops the search. + """ return self._max_solutions @property @@ -703,10 +751,12 @@ def split_heuristic_preprocess_naive_interleaving_strategy(self): """Strategy for naive interleaving preprocessing step Supported values are: - - "depth": Always pick the instruction with the lower possible - depth in the DFG first. - - "alternate": Try to evenly alternate between instructions tagged with - "interleaving_class=0/1". + + * "depth": Always pick the instruction with the lower possible + depth in the DFG first. + * "alternate": Try to evenly alternate between instructions tagged with + "interleaving_class=0/1". + """ return self._split_heuristic_preprocess_naive_interleaving_strategy @@ -1039,16 +1089,18 @@ def max_displacement(self): """The maximum relative displacement of an instruction. Examples: - - If set to 1, instructions can be reordered freely. - - If set to 0, no reordering will happen. - - If set to 0.5, an instruction will not move by more than N/2 + + * If set to 1, instructions can be reordered freely. + * If set to 0, no reordering will happen. + * If set to 0.5, an instruction will not move by more than N/2 places between original and re-scheduled source code. This is an experimental feature for the purpose of speeding up otherwise intractable optimization tasks. - LIMITATION: This only takes effect in straightline optimization - (no software pipelining). + .. warning:: + + This only takes effect in straightline optimization (no software pipelining). """ return self._max_displacement @@ -1157,8 +1209,7 @@ def rename_hint_orig_rename(self): @property def ext_bsearch_remember_successes(self): - """When using an external binary search, hint previous successful - optimiation. + """When using an external binary search, hint previous successful optimization. See also Config.variable_size.""" return self._ext_bsearch_remember_successes diff --git a/slothy/core/core.py b/slothy/core/core.py index 2b153c67..71d5efef 100644 --- a/slothy/core/core.py +++ b/slothy/core/core.py @@ -1438,12 +1438,11 @@ def success(self): def __init__(self, Arch, Target, *, logger=None, config=None): """Create a stateless SLOTHY instance - args: - Arch: A model of the underlying architecture. - Target: A model of the underlying microarchitecture. - logger: The logger to be used. - If omitted, a child of the root logger will be used. - config: The configuration to use. + :param Arch: A model of the underlying architecture. + :param Target: A model of the underlying microarchitecture. + :param logger: The logger to be used. + If omitted, a child of the root logger will be used. + :param config: The configuration to use. If omitted, the default configuration will be used. """ super().__init__() @@ -3130,14 +3129,12 @@ def _add_constraints_misc(self): def get_inst_pairs(self, cond_fst=None, cond_snd=None, cond=None): """Yields all instruction pairs satisfying the provided predicate. - This can be useful for the specification of additional - microarchitecture-specific constraints. - - Args: - cond: Predicate on pairs of ComputationNode's. True by default. + :param cond_fst: + :param cond_snd: + :param cond: Predicate on pairs of ComputationNode's. True by default. + :return: Generator of all instruction pairs satisfying the predicate. + """ - Returns: - Generator of all instruction pairs satisfying the predicate.""" if cond_fst is None: cond_fst = lambda x: True if cond_snd is None: @@ -3302,9 +3299,9 @@ def restrict_slots_for_instructions_by_class(self, cls_lst, slots): """Restrict issue slots for all instructions belonging to the provided list of instruction classes. - Args: - cls_lst: A list of instruction classes - slots: A list of issue slots represented as integers.""" + :param cls_lst: A list of instruction classes + :param slots: A list of issue slots represented as integers. + """ self.restrict_slots_for_instructions( self.filter_instructions_by_class(cls_lst), slots ) @@ -3312,9 +3309,9 @@ def restrict_slots_for_instructions_by_property(self, filter_func, slots): """Restrict issue slots for all instructions passing the given filter function. - Args: - cls_lst: A predicate on instructions - slots: A list of issue slots represented as integers.""" + :param cls_lst: A predicate on instructions + :param slots: A list of issue slots represented as integers. + """ self.restrict_slots_for_instructions( self.filter_instructions_by_property(filter_func), slots ) diff --git a/slothy/core/dataflow.py b/slothy/core/dataflow.py index 24bd4d87..71ffdcf8 100644 --- a/slothy/core/dataflow.py +++ b/slothy/core/dataflow.py @@ -32,7 +32,8 @@ class SlothyUselessInstructionException(Exception): """An instruction was found whose outputs are neither used by a subsequent instruction nor declared as global outputs of the code under consideration. The instruction is therefore useless according to the architecture model given to SLOTHY. Consider removing the instruction - or refining the architecture model.""" + or refining the architecture model. + """ class RegisterSource: """Representation of the output of an instruction @@ -160,17 +161,16 @@ class ComputationNode: def __init__(self, *, node_id, inst, orig_pos=None, src_in=None, src_in_out=None): """A node in a data flow graph - Args: - id: A unique identifier for the node - inst: The instruction which the node represents - Must be an instance of Instruction - src_in: A list of RegisterSource instances representing - the inputs to the instruction. Inputs which are - also written to should not be listed here, but in - the separate src_in_out argument. - src_in_out: A list of RegisterSource instances representing - the inputs to the instruction which are also - written to. + + :param id: A unique identifier for the node + :param inst: The instruction which the node represents + Must be an instance of Instruction + :param src_in: A list of RegisterSource instances representing + the inputs to the instruction. Inputs which are + also written to should not be listed here, but in + the separate src_in_out argument. + :param src_in_out: A list of RegisterSource instances representing the inputs to the instruction which are + also written to. """ def isinstancelist(l, c): @@ -294,11 +294,12 @@ def arch(self): @property def typing_hints(self): """A dictionary of 'typing hints' explicitly assigning to symbolic register names - a register type. + a register type. This can be necessary to disambiguate the type of symbolic registers. For example, the Helium vector extension has various instructions which - accept either vector or GPR arguments.""" + accept either vector or GPR arguments. + """ typing_hints = { name : ty for ty in self.arch.RegisterType \ for name in self.arch.RegisterType.list_registers(ty, with_variants=True) } return { **self._typing_hints, **typing_hints } @@ -309,12 +310,14 @@ def outputs(self): @property def inputs_are_outputs(self): """Every input is automatically treated as an output. - This is typically set for loop kernels.""" + This is typically set for loop kernels. + """ return self._inputs_are_outputs @property def allow_useless_instructions(self): """Indicates whether data flow creation should raise SlothyUselessInstructionException - when a useless instruction is detected.""" + when a useless instruction is detected. + """ return self._allow_useless_instructions @typing_hints.setter @@ -333,9 +336,8 @@ def allow_useless_instructions(self,val): def __init__(self, slothy_config=None, **kwargs): """Create a DataFlowGraph config from a Slothy config - Args: - slothy_config: The Slothy configuration to reference. - kwargs: An optional list of modifications of the Slothy config + :param slothy_config: The Slothy configuration to reference. + :param kwargs: An optional list of modifications of the Slothy config """ self._arch = None self._typing_hints = None @@ -376,14 +378,16 @@ def nodes_all(self): (a) make the graph self-contained in the sense that there are no external inputs to the graph, (b) makes it easier to track where outputs are written, (c) automatically creates input nodes for - outputs which are never written to.""" + outputs which are never written to. + """ return self._nodes_all @property def nodes(self): """The list of all ComputationNodes corresonding to instructions in the original source code. Compared to DataFlowGraph.nodes_all, this - omits "virtual" computation nodes.""" + omits "virtual" computation nodes. + """ return list(filter(lambda x: x.is_not_virtual, self.nodes_all)) @property @@ -443,14 +447,16 @@ def nodes_by_id(self): @property def nodes_low(self): """For a source with an even number of instructions, the lower half of the - data flow graph, excluding virtual instructions.""" + data flow graph, excluding virtual instructions. + """ num_nodes = len(self.nodes) assert num_nodes % 2 == 0 return self.nodes[:num_nodes//2] @property def nodes_high(self): """For a source with an even number of instructions, the upper half of the - data flow graph, excluding virtual instructions.""" + data flow graph, excluding virtual instructions. + """ num_nodes = len(self.nodes) assert num_nodes % 2 == 0 return self.nodes[num_nodes//2:] @@ -470,7 +476,8 @@ def edges(self): The ID of a virtual input/output node is "input/output_{name}" (as a string), while the ID of computation nodes corresponding to instructions in the input - source is their original position, as an integer.""" + source is their original position, as an integer. + """ def _iter_edges_with_label(): for t in self.nodes_all: for out_idx, deps in enumerate(t.dst_out): @@ -485,7 +492,8 @@ def depth(self): """The depth of the data flow graph. Equivalently, the maximum length of a dependency chain in the assembly source - represented by the graph.""" + represented by the graph. + """ if self.nodes is None or len(self.nodes) == 0: return 1 return max(t.depth for t in self.nodes) @@ -628,15 +636,14 @@ def address_offset_cb(t, log=None): def __init__(self, src, logger, config, parsing_cb=True): """Compute a data flow graph from a source code snippet. - Args: - arch: The underlying architecture. - src: The source code to be converted into a data flow graph. - logger: The logger to be used. - typing_hints: String-indexed dictionary mapping symbolic register names - to types. Types are members of the RegisterType enum from the - arch module. - outputs: The symbolic or architectural registers that the code produces. - Dictionary indexed by the RegisterType enum from the the arch module. + :param arch: The underlying architecture. + :param src: The source code to be converted into a data flow graph. + :param logger: The logger to be used. + :param typing_hints: String-indexed dictionary mapping symbolic register names + to types. Types are members of the RegisterType enum from the + arch module. + :returns: The symbolic or architectural registers that the code produces. + Dictionary indexed by the RegisterType enum from the the arch module. """ self.logger = logger @@ -655,7 +662,8 @@ def __init__(self, src, logger, config, parsing_cb=True): def _selfcheck_outputs(self): """Checks whether there are instructions whose output(s) are never used, but also - not declared as outputs.""" + not declared as outputs. + """ def flatten(llst): return [x for y in llst for x in y] @@ -698,7 +706,8 @@ def iter_dependencies(self): Each returned element has the form (consumer, producer, ty, idx), representing a dependency from output producer to the idx-th input (if ty=="in") or input/output (if ty=="inout") of consumer. The producer field is an instance of RegisterSource and contains the output index - and source instruction as producer.idx and producer.src, respectively.""" + and source instruction as producer.idx and producer.src, respectively. + """ for consumer in self.nodes_all: for idx, producer in enumerate(consumer.src_in): yield (consumer, producer, "in", idx) @@ -911,12 +920,12 @@ def _process_spill_instruction(self, reg, loc, ty): self.spilled_reg_state[loc] = self._find_source_single(ty, reg) def _add_node(self, s): - """Add a node to the data flow graph + """ + Add a node to the dataflow graph - Args: - s: Instruction to be added to the graph. This may be a single - instruction of a list of candidate instructions, in case the - parsing wasn't unambiguous. + :param s: Instruction to be added to the graph. This may be a single + instruction of a list of candidate instructions, in case the + parsing wasn't unambiguous. """ if not isinstance(s, VirtualInstruction): diff --git a/slothy/core/heuristics.py b/slothy/core/heuristics.py index 900b78e4..d2b3e2bd 100644 --- a/slothy/core/heuristics.py +++ b/slothy/core/heuristics.py @@ -48,7 +48,8 @@ class Heuristics(): The one-shot SLOTHY approach tends to become computationally infeasible above 200 assembly instructions. To optimize kernels beyond that threshold, this class provides heuristics to split the optimization problem into several - smaller-sizes problems amenable to one-shot SLOTHY.""" + smaller-sizes problems amenable to one-shot SLOTHY. + """ @staticmethod def _optimize_binsearch_core(source, logger, conf, **kwargs): @@ -110,12 +111,12 @@ def try_with_stalls(stalls, timeout=None): def optimize_binsearch(source, logger, conf, **kwargs): """Optimize for minimum number of stalls, and potentially a secondary objective. - Args: - source: The source code to be optimized. Must be a list of - SourceLine instances. - logger: The logger to be used - conf: The configuration to apply. This fixed for all one-shot SLOTHY - runs invoked by this call, except for the variation of the stall count. + :param source: The source code to be optimized. Must be a list of + SourceLine instances. + :param logger: The logger to be used + :param conf: The configuration to apply. This fixed for all one-shot SLOTHY + runs invoked by this call, except for the variation of the stall count. + :returns: The Result object for the succceeding optimization with the smallest number of stalls. The `variable_size` configuration option determines whether the minimiation of stalls happens internally or externally. Internal minimization means that the @@ -123,10 +124,6 @@ def optimize_binsearch(source, logger, conf, **kwargs): objective to the underlying solver. External minimization means that the number of stalls is statically fixed per one-shot SLOTHY optimization, and that an external binary search is used to minimize it. - - Returns: - The Result object for the succceeding optimization with the smallest - number of stalls. """ flexible = not conf.constraints.functional_only @@ -162,18 +159,17 @@ def optimize_binsearch_external(source, logger, conf, flexible=True, **kwargs): has a secondary objective, it then re-optimizes the result for that secondary objective, fixing the minimal number of stalls. - Args: - source: The source code to be optimized. Must be a list of SourceLine instances. - logger: The logger to be used. - conf: The configuration to apply. This is fixed for all one-shot SLOTHY - runs invoked by this call, except for variation of stall count. - flexible: Indicates whether the number of stalls should be minimized - through a binary search, or whether a single one-shot SLOTHY optimization - for a fixed number of stalls (encoded in the configuration) should be - conducted. - - Returns: - A Result object representing the final optimization result. + + :param source: The source code to be optimized. Must be a list of SourceLine instances. + :param logger: The logger to be used. + :param conf: The configuration to apply. This is fixed for all one-shot SLOTHY + runs invoked by this call, except for variation of stall count. + :param flexible: Indicates whether the number of stalls should be minimized + through a binary search, or whether a single one-shot SLOTHY optimization + for a fixed number of stalls (encoded in the configuration) should be + conducted. + + :return: A Result object representing the final optimization result. """ if not flexible: @@ -212,18 +208,17 @@ def optimize_binsearch_internal(source, logger, conf, flexible=True, **kwargs): If the provided configuration has a secondary objective, it then re-optimizes the result for that secondary objective, fixing the minimal number of stalls. - Args: - source: The source code to be optimized. Must be a list of SourceLine instances. - logger: The logger to be used. - conf: The configuration to apply. This is fixed for all one-shot SLOTHY - runs invoked by this call, except for variation of stall count. - flexible: Indicates whether the number of stalls should be minimized - through a binary search, or whether a single one-shot SLOTHY optimization - for a fixed number of stalls (encoded in the configuration) should be - conducted. - - Returns: - A Result object representing the final optimization result. + + :param source: The source code to be optimized. Must be a list of SourceLine instances. + :param logger: The logger to be used. + :param conf: The configuration to apply. This is fixed for all one-shot SLOTHY + runs invoked by this call, except for variation of stall count. + :param flexible: Indicates whether the number of stalls should be minimized + through a binary search, or whether a single one-shot SLOTHY optimization + for a fixed number of stalls (encoded in the configuration) should be + conducted. + :return: A Result object representing the final optimization result. + """ if not flexible: @@ -293,18 +288,18 @@ def periodic(body, logger, conf): done via Heuristics.linear() and thus themselves subject to the splitting heuristic, if enabled. - Args: - body: The loop body to be optimized. This must be a list of - SourceLine instances. - logger: The logger to be used. - conf: The configuration to be applied. - Returns: - Tuple (preamble, kernel, postamble, num_exceptional_iterations) + :param body: The loop body to be optimized. This must be a list of + SourceLine instances. + :param logger: The logger to be used. + :param conf: The configuration to be applied. + + :return: Tuple (preamble, kernel, postamble, num_exceptional_iterations) of preamble, kernel and postamble (each as a list of SourceLine objects), plus the number of iterations jointly accounted for by the preamble and postamble (the caller will need this to adjust the loop counter). + """ if conf.sw_pipelining.enabled and not conf.inputs_are_outputs: @@ -381,13 +376,11 @@ def linear(body, logger, conf): input is optimized by successively applying one-shot optimizations to a 'sliding window' of code. - Args: - body: The assembly input to be optimized. This must be a list of - SourceLine objects. - conf: The configuration to be applied. Software pipelining must be disabled. + :param body: The assembly input to be optimized. This must be a list of + SourceLine objects. + :param conf: The configuration to be applied. Software pipelining must be disabled. - Raises: - Raises a SlothyException if software pipelining is enabled. + :raises: Raises a SlothyException if software pipelining is enabled. """ assert SourceLine.is_source(body) if conf.sw_pipelining.enabled: diff --git a/slothy/core/slothy.py b/slothy/core/slothy.py index f16a6c0e..2ba8119e 100644 --- a/slothy/core/slothy.py +++ b/slothy/core/slothy.py @@ -31,9 +31,9 @@ fixed-instruction assembly superoptimizer based on constraint solving. It takes handwritten assembly as input and simultaneously super-optimizes: -- Instruction scheduling -- Register allocation -- Software pipelining +* Instruction scheduling +* Register allocation +* Software pipelining SLOTHY enables a development workflow where developers write 'clean' assembly by hand, emphasizing the logic of the computation, while SLOTHY automates microarchitecture-specific @@ -43,7 +43,8 @@ and readability- and verifiability-impeding micro-optimizations. This module provides the Slothy class, which is a stateful interface to both -one-shot and heuristic optimiations using SLOTHY.""" +one-shot and heuristic optimiations using SLOTHY. +""" import os import logging @@ -63,6 +64,7 @@ except ImportError: Uc = None + class Slothy: """SLOTHY optimizer @@ -70,12 +72,13 @@ class Slothy: optimizations using SLOTHY. The basic flow of operation is the following: - - Initialize an instance, providing models to the target architecture + + * Initialize an instance, providing models to the target architecture and microarchitecture as arguments. - - Load source code from file or raw string. - - Repeat: Adjust configuration and conduct an optimization of a loop body or - straightline block of code, using optimize() or optimize_loop(). - - Write source code to file or raw string. + * Load source code from file or raw string. + * Repeat: Adjust configuration and conduct an optimization of a loop body or + traightline block of code, using optimize() or optimize_loop(). + * Write source code to file or raw string. The use of heuristics is controlled through the configuration. """ @@ -83,8 +86,10 @@ class Slothy: # Quick convenience access to architecture and target from the config def _get_arch(self): return self.config.arch + def _get_target(self): return self.config.target + arch = property(_get_arch) target = property(_get_target) @@ -102,16 +107,18 @@ def __init__(self, arch, target, logger=None): @property def source(self): - """Returns the current source code as an array of SourceLine objects + """Returns the current source code as an array of SourceLine objects. - If you want the current source code as a multiline string, use get_source_as_string().""" + If you want the current source code as a multiline string, use get_source_as_string(). + """ return self._source @property def original_source(self): """Returns the original source code as an array of SourceLine objects - If you want the current source code as a multiline string, use get_original_source_as_string().""" + If you want the current source code as a multiline string, use get_original_source_as_string(). + """ return self._original_source @source.setter @@ -127,12 +134,12 @@ def original_source(self, val): def get_source_as_string(self, comments=True, indentation=True, tags=True): """Retrieve current source code as multi-line string""" return SourceLine.write_multiline(self.source, comments=comments, - indentation=indentation, tags=tags) + indentation=indentation, tags=tags) def get_original_source_as_string(self, comments=True, indentation=True, tags=True): """Retrieve original source code as multi-line string""" return SourceLine.write_multiline(self.original_source, comments=comments, - indentation=indentation, tags=tags) + indentation=indentation, tags=tags) def set_source_as_string(self, s): """Provide input source code as multi-line string""" @@ -151,12 +158,12 @@ def load_source_from_file(self, filename): """Load source code from file""" if self.source is not None: self.logger.warning("Overwriting previous source code") - with open(filename,"r", encoding="utf8") as f: + with open(filename, "r", encoding="utf8") as f: self.load_source_raw(f.read()) def write_source_to_file(self, filename): """Write current source code to file""" - with open(filename,"w", encoding="utf8") as f: + with open(filename, "w", encoding="utf8") as f: f.write(self.get_source_as_string()) def rename_function(self, old_funcname, new_funcname): @@ -175,14 +182,16 @@ def _dump(name, s, logger, err=False): def global_selftest(self, funcname, address_registers, iterations=5): """Conduct a function-level selftest - - funcname: Name of function to be called. Must be exposed as a symbol - - address_prs: Dictionary indicating which GPRs are pointers to buffers of which size. + :param funcname: Name of function to be called. Must be exposed as a symbol + :param address_prs: Dictionary indicating which GPRs are pointers to buffers of which size. For example, `{ "x0": 1024, "x4": 1024 }` would indicate that both x0 and x4 point to buffers of size 1024 bytes. The global selftest needs to know this to setup valid calls to the assembly routine. - DEPENDENCY: To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc` - in your PATH. Those are part of a standard LLVM setup. + .. important:: + + To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc` + in your PATH. Those are part of a standard LLVM setup. """ log = self.logger.getChild(f"global_selftest_{funcname}") @@ -191,7 +200,7 @@ def global_selftest(self, funcname, address_registers, iterations=5): raise SelfTestException("Cannot run selftest -- unicorn-engine is not available.") if self.config.arch.unicorn_arch is None or \ - self.config.arch.llvm_mc_arch is None: + self.config.arch.llvm_mc_arch is None: log.warning("Selftest not supported on target architecture") return @@ -255,8 +264,8 @@ def _make_llvm_mca_stats(self, pre, code, post, txt, indentation): self.config.target.llvm_mca_target, self.logger, full=self.config.llvm_mca_full, issue_width=issue_width) - stats = ["",f"LLVM MCA STATISTICS ({txt}) BEGIN",""] + stats + \ - ["", f"ORIGINAL LLVM MCA STATISTICS ({txt}) END",""] + stats = ["", f"LLVM MCA STATISTICS ({txt}) BEGIN", ""] + stats + \ + ["", f"ORIGINAL LLVM MCA STATISTICS ({txt}) END", ""] stats = [SourceLine("").add_comment(r) for r in stats] stats = SourceLine.apply_indentation(stats, indentation) except LLVM_Mca_Error: @@ -267,19 +276,21 @@ def _make_llvm_mca_stats(self, pre, code, post, txt, indentation): def optimize(self, start=None, end=None, loop_synthesis_cb=None, logname=None): """Optimize all or part of the currently loaded source code - Note: It is OK to use this in software pipelining mode. In this case, the - tool will output preamble, kernel, and postamble separately, while the looping - code itself needs to be introduced by the user. Alternatively, a callback can be - provided which will be given preamble, kernel, postamble, and the number of exceptional - iterations, and piece together a list of source code lines from that. - - Args: - start: The label marking the beginning of the part of the code to optimize. - This cannot be used together with the 'loop' argument. - end: The label marking the end of the part of the code to optimize. - This cannot be used together with the 'loop' argument. - loop_synthesis_cb: Optional (None by default) callback synthesis final source code - from tuple of (preamble, kernel, postamble, # exceptional iterations). + .. note:: + + It is OK to use this in software pipelining mode. In this case, the + tool will output preamble, kernel, and postamble separately, while the looping + code itself needs to be introduced by the user. Alternatively, a callback can be + provided which will be given preamble, kernel, postamble, and the number of exceptional + iterations, and piece together a list of source code lines from that. + + + :param start: The label marking the beginning of the part of the code to optimize. + This cannot be used together with the 'loop' argument. + :param end: The label marking the end of the part of the code to optimize. + This cannot be used together with the 'loop' argument. + :param loop_synthesis_cb: Optional (None by default) callback synthesis final source code + from tuple of (preamble, kernel, postamble, # exceptional iterations). """ # pylint:disable=too-many-locals @@ -327,7 +338,7 @@ def optimize(self, start=None, end=None, loop_synthesis_cb=None, logname=None): core = core + new_stats_kernel def indented(code): - return [ SourceLine(l).set_indentation(indentation) for l in code] + return [SourceLine(l).set_indentation(indentation) for l in code] if start is not None: core = [SourceLine(f"{start}:")] + core @@ -341,7 +352,7 @@ def indented(code): assert num_exceptional == 0 optimized_source = core elif loop_synthesis_cb is not None: - optimized_source = loop_synthesis_cb( pre, core, post, num_exceptional) + optimized_source = loop_synthesis_cb(pre, core, post, num_exceptional) else: optimized_source = [] optimized_source += indented([f"// Exceptional iterations: {num_exceptional}", @@ -357,10 +368,9 @@ def indented(code): def get_loop_input_output(self, loop_lbl, forced_loop_type=None): """Find all registers that a loop body depends on - - Args: - loop_lbl: Label of loop to process. - forced_loop_type: Forces the loop to be parsed as a certain type. + + :param loop_lbl: Label of loop to process. + :param forced_loop_type: Forces the loop to be parsed as a certain type. """ logger = self.logger.getChild(loop_lbl) _, body, _, _, _ = self.arch.Loop.extract(self.source, loop_lbl, forced_loop_type=forced_loop_type) @@ -408,11 +418,11 @@ def _fusion_core(self, pre, body, post, logger, ssa=True): if ssa is True: dfg = DFG(body, logger.getChild("ssa"), dfgc, parsing_cb=False) dfg.ssa() - body = [ ComputationNode.to_source_line(t) for t in dfg.nodes ] + body = [ComputationNode.to_source_line(t) for t in dfg.nodes] dfg = DFG(body, logger.getChild("fusion"), dfgc, parsing_cb=False) dfg.apply_fusion_cbs() - body = [ ComputationNode.to_source_line(t) for t in dfg.nodes ] + body = [ComputationNode.to_source_line(t) for t in dfg.nodes] return body @@ -421,19 +431,18 @@ def fusion_region(self, start, end, **kwargs): instruction (sequences) with an alternative. These replacements are defined in the architectural model by setting an instruction class' global_fusion_cb. - - Args: - start: The label marking the beginning of the part of the code to - apply fusion to. - end: The label marking the end of the part of the code to apply - fusion to. + + :param start: The label marking the beginning of the part of the code to + apply fusion to. + :param end: The label marking the end of the part of the code to apply + fusion to. """ logger = self.logger.getChild(f"ssa_{start}_{end}") pre, body, post = AsmHelper.extract(self.source, start, end) - body_ssa = [ SourceLine(f"{start}:") ] +\ - self._fusion_core(pre, body, post, logger, **kwargs) + \ - [ SourceLine(f"{end}:") ] + body_ssa = [SourceLine(f"{start}:")] + \ + self._fusion_core(pre, body, post, logger, **kwargs) + \ + [SourceLine(f"{end}:")] self.source = pre + body_ssa + post assert SourceLine.is_source(self.source) @@ -441,14 +450,13 @@ def fusion_loop(self, loop_lbl, forced_loop_type=None, **kwargs): """Run fusion callbacks on loop body replacing certain instruction (sequences) with an alternative. These replacements are defined in the architectural model by setting an instruction class' global_fusion_cb. - - Args: - loop_lbl: Label of loop to which the fusions are applied to. - forced_loop_type: Forces the loop to be parsed as a certain type. + + :param loop_lbl: Label of loop to which the fusions are applied to. + :param forced_loop_type: Forces the loop to be parsed as a certain type. """ logger = self.logger.getChild(f"ssa_loop_{loop_lbl}") - pre , body, post, _, other_data, loop = \ + pre, body, post, _, other_data, loop = \ self.arch.Loop.extract(self.source, loop_lbl, forced_loop_type=forced_loop_type) try: @@ -459,18 +467,17 @@ def fusion_loop(self, loop_lbl, forced_loop_type=None, **kwargs): indentation = AsmHelper.find_indentation(body) body_ssa = SourceLine.read_multiline(loop.start(loop_cnt)) + \ - SourceLine.apply_indentation(self._fusion_core(pre, body, post, logger, **kwargs), indentation) + \ - SourceLine.read_multiline(loop.end(other_data)) + SourceLine.apply_indentation(self._fusion_core(pre, body, post, logger, **kwargs), indentation) + \ + SourceLine.read_multiline(loop.end(other_data)) self.source = pre + body_ssa + post assert SourceLine.is_source(self.source) def optimize_loop(self, loop_lbl, postamble_label=None, forced_loop_type=None): """Optimize the loop starting at a given label - - Args: - postamble_label: Marks end of loop kernel. - forced_loop_type: Forces the loop to be parsed as a certain type. + + :param postamble_label: Marks end of loop kernel. + :param forced_loop_type: Forces the loop to be parsed as a certain type. """ logger = self.logger.getChild(loop_lbl) @@ -502,14 +509,14 @@ def optimize_loop(self, loop_lbl, postamble_label=None, forced_loop_type=None): body = AsmAllocation.unfold_all_aliases(c.register_aliases, body) body = SourceLine.apply_indentation(body, indentation) self.logger.info("Optimizing loop %s (%d instructions) ...", - loop_lbl, len(body)) + loop_lbl, len(body)) if self.config.with_llvm_mca_before is True: orig_stats = self._make_llvm_mca_stats(early, body, late, "ORIGINAL", indentation) preamble_code, kernel_code, postamble_code, num_exceptional = \ Heuristics.periodic(body, logger, c) - + # Remove branch instructions from preamble and postamble postamble_code = [l for l in postamble_code if not l.tags.get('branch')] postamble_code = [l for l in postamble_code if not l.tags.get('branch')] @@ -523,13 +530,13 @@ def optimize_loop(self, loop_lbl, postamble_label=None, forced_loop_type=None): kernel_code = kernel_code + new_stats_kernel if self.config.sw_pipelining.optimize_preamble is True \ - and len(preamble_code) > 0: + and len(preamble_code) > 0: new_stats_preamble = self._make_llvm_mca_stats(early, preamble_code, late, "PREAMBLE", indentation) preamble_code = preamble_code + new_stats_preamble if self.config.sw_pipelining.optimize_postamble is True \ - and len(postamble_code) > 0: + and len(postamble_code) > 0: new_stats_postamble = self._make_llvm_mca_stats(early, postamble_code, late, "POSTAMBLE", indentation) postamble_code = postamble_code + new_stats_postamble @@ -540,6 +547,7 @@ def indented(code): return SourceLine.apply_indentation(code, self.config.indentation) loop_lbl_end = f"{loop_lbl}_end" + def loop_lbl_iter(i): return f"{loop_lbl}_iter_{i}" @@ -570,10 +578,10 @@ def loop_lbl_iter(i): register_aliases=c.register_aliases)) optimized_code += indented(kernel_code) optimized_code += SourceLine.read_multiline(loop.end(other_data, - indentation=self.config.indentation)) + indentation=self.config.indentation)) if postamble_label is not None: - optimized_code += [ SourceLine(f"{postamble_label}:") - .add_comment("end of loop kernel") ] + optimized_code += [SourceLine(f"{postamble_label}:") + .add_comment("end of loop kernel")] optimized_code += indented(postamble_code) if self.config.sw_pipelining.unknown_iteration_count: diff --git a/slothy/helper.py b/slothy/helper.py index 3d386d09..d4bd10e8 100644 --- a/slothy/helper.py +++ b/slothy/helper.py @@ -1585,11 +1585,10 @@ def extract(source, lbl, forced_loop_type=None): """ Find a loop with start label `lbl` in `source` and return it together with its type. - - Args: - source: list of SourceLine objects - lbl: label of the loop to extract - forced_loop_type: if not None, only try to extract this type of loop + + :param source: list of SourceLine objects + :param lbl: label of the loop to extract + :param forced_loop_type: if not None, only try to extract this type of loop """ if forced_loop_type is not None: loop_types = [forced_loop_type] diff --git a/slothy/targets/aarch64/aarch64_neon.py b/slothy/targets/aarch64/aarch64_neon.py index 2426fd2f..0bdff608 100644 --- a/slothy/targets/aarch64/aarch64_neon.py +++ b/slothy/targets/aarch64/aarch64_neon.py @@ -278,12 +278,14 @@ class SubsLoop(Loop): Loop ending in a flag setting subtraction and a branch. Example: - ``` - loop_lbl: - {code} - sub[s] , , # - (cbnz|bnz|bne) , loop_lbl - ``` + + .. code-block:: asm + + loop_lbl: + {code} + sub[s] , , # + (cbnz|bnz|bne) , loop_lbl + where cnt is the loop counter in lr. """ def __init__(self, lbl="lbl", lbl_start="1", lbl_end="2", loop_init="lr") -> None: @@ -510,18 +512,15 @@ def make(cls, src): def build(c, src, mnemonic, **kwargs): """Attempt to parse a string as an instance of an instruction. - Args: - c: The target instruction the string should be attempted to be parsed as. - src: The string to parse. - mnemonic: The mnemonic of instruction c - Returns: - Upon success, the result of parsing src as an instance of c. + :param c: The target instruction the string should be attempted to be parsed as. + :param src: The string to parse. + :param mnemonic: The mnemonic of instruction c + + :return: Upon success, the result of parsing src as an instance of c. - Raises: - ParsingException: The str argument cannot be parsed as an - instance of c. - FatalParsingException: A fatal error during parsing happened + :raises: :ParsingException: The str argument cannot be parsed as an instance of c. + :FatalParsingException: A fatal error during parsing happened that's likely a bug in the model. """ @@ -3237,7 +3236,10 @@ def eor3_fusion_cb(): Example for a fusion call back. Allows to merge two eor instruction with two inputs into one eor with three inputs. Such technique can help perform transformations in case of differences between uArchs. - Note: This is not used in any real (crypto) example. This is merely a PoC. + + .. note:: + + This is not used in any real (crypto) example. This is merely a PoC. """ def core(inst,t,log=None): succ = None @@ -3300,7 +3302,10 @@ def eor3_splitting_cb(): Example for a splitting call back. Allows to split one eor instruction with three inputs into two eors with two inputs. Such technique can help perform transformations in case of differences between uArchs. - Note: This is not used in any real (crypto) example. This is merely a PoC. + + .. note:: + + This is not used in any real (crypto) example. This is merely a PoC. """ def core(inst,t,log=None): diff --git a/slothy/targets/aarch64/cortex_a55.py b/slothy/targets/aarch64/cortex_a55.py index 7cdadb13..95eb15a7 100644 --- a/slothy/targets/aarch64/cortex_a55.py +++ b/slothy/targets/aarch64/cortex_a55.py @@ -31,7 +31,9 @@ Most data in this model is derived from the Cortex-A55 software optimization guide. Some latency exceptions were manually identified through microbenchmarks. -WARNING: The data in this module is approximate and may contain errors. +.. warning:: + + The data in this module is approximate and may contain errors. """ ################################### NOTE ############################################### diff --git a/slothy/targets/aarch64/cortex_a72_frontend.py b/slothy/targets/aarch64/cortex_a72_frontend.py index c2bd0554..7cfd2387 100644 --- a/slothy/targets/aarch64/cortex_a72_frontend.py +++ b/slothy/targets/aarch64/cortex_a72_frontend.py @@ -46,7 +46,9 @@ This file thus tries to model basic aspects of the frontend of Cortex-A72 alongside instruction latencies, both taken from the Cortex-A72 Software Optimization Guide. -Note: We focus on a very small subset of AArch64, just enough to experiment with the +.. note:: + + We focus on a very small subset of AArch64, just enough to experiment with the optimization of the Kyber and Dilithium NTT. """ diff --git a/slothy/targets/arm_v7m/__init__.py b/slothy/targets/arm_v7m/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 1de96011..23e79ebd 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -214,19 +214,24 @@ class VmovCmpLoop(Loop): register before the loop starts and therefore needs to be recovered before the comparison. - WARNING: This type of loop is experimental as slothy has no knowledge about - what happens inside the loop boundary! Especially, a register is written - inside the boundary which may be used for renaming by slothy. Use with - caution. + .. warning:: + + This type of loop is experimental as slothy has no knowledge about + what happens inside the loop boundary! Especially, a register is written + inside the boundary which may be used for renaming by slothy. Use with + caution. Example: - ``` - loop_lbl: - {code} - vmov , - cmp , - (cbnz|bnz|bne) loop_lbl - ``` where cnt is the loop counter in lr. + + .. code-block:: asm + + loop_lbl: + {code} + vmov , + cmp , + (cbnz|bnz|bne) loop_lbl + + where cnt is the loop counter in lr. """ def __init__(self, lbl="lbl", lbl_start="1", lbl_end="2", loop_init="lr") -> None: super().__init__(lbl_start=lbl_start, lbl_end=lbl_end, loop_init=loop_init) @@ -303,7 +308,12 @@ class BranchLoop(Loop): More general loop type that just considers the branch instruction as part of the boundary. This can help to improve performance as the instructions that belong to handling the loop can be considered by SLOTHY aswell. - Note: This loop type is still rather experimental. It has a lot of logics inside as it needs to be able to "understand" a variety of different ways to express loops, e.g., how counters get incremented, how registers marking the end of the loop need to be modified in case of software pipelining etc. Currently, this type covers the three other types we offer above, namely `SubsLoop`, `CmpLoop`, and `VmovCmpLoop`. + .. note:: + + This loop type is still rather experimental. It has a lot of logics inside as it needs to be able to "understand" + a variety of different ways to express loops, e.g., how counters get incremented, how registers marking the end of the + loop need to be modified in case of software pipelining etc. Currently, this type covers the three other types we offer + above, namely `SubsLoop`, `CmpLoop`, and `VmovCmpLoop`. For examples, we refer to the classes `SubsLoop`, `CmpLoop`, and `VmovCmpLoop`. """ @@ -405,12 +415,14 @@ class CmpLoop(Loop): what happens inside the loop boundary! Use with caution. Example: - ``` - loop_lbl: - {code} - cmp , - (cbnz|bnz|bne) loop_lbl - ``` + + .. code-block:: asm + + loop_lbl: + {code} + cmp , + (cbnz|bnz|bne) loop_lbl + where cnt is the loop counter in lr. """ def __init__(self, lbl="lbl", lbl_start="1", lbl_end="2", loop_init="lr") -> None: @@ -478,12 +490,14 @@ class SubsLoop(Loop): Loop ending in a flag setting subtraction and a branch. Example: - ``` - loop_lbl: - {code} - sub[s] , , #1 - (cbnz|bnz|bne) loop_lbl - ``` + + .. code-block:: + + loop_lbl: + {code} + sub[s] , , #1 + (cbnz|bnz|bne) loop_lbl + where cnt is the loop counter in lr. """ def __init__(self, lbl_start="1", lbl_end="2", loop_init="lr") -> None: @@ -697,18 +711,16 @@ def make(cls, src): def build(c, src, mnemonic, **kwargs): """Attempt to parse a string as an instance of an instruction. - Args: - c: The target instruction the string should be attempted to be parsed as. - src: The string to parse. - mnemonic: The mnemonic of instruction c - Returns: - Upon success, the result of parsing src as an instance of c. + :param c: The target instruction the string should be attempted to be parsed as. + :param src: The string to parse. + :param mnemonic: The mnemonic of instruction c + + :return: Upon success, the result of parsing src as an instance of c. - Raises: - ParsingException: The str argument cannot be parsed as an + :raises: :ParsingException: The str argument cannot be parsed as an instance of c. - FatalParsingException: A fatal error during parsing happened + :FatalParsingException: A fatal error during parsing happened that's likely a bug in the model. """ diff --git a/slothy/targets/arm_v7m/cortex_m7.py b/slothy/targets/arm_v7m/cortex_m7.py index 3fb89716..6876ea8c 100644 --- a/slothy/targets/arm_v7m/cortex_m7.py +++ b/slothy/targets/arm_v7m/cortex_m7.py @@ -1,7 +1,9 @@ """ Experimental Cortex-M7 microarchitecture model for SLOTHY -WARNING: The data in this module is approximate and may contain errors. +.. warning:: + + The data in this module is approximate and may contain errors. """ ################################### NOTE ############################################### diff --git a/slothy/targets/arm_v81m/arch_v81m.py b/slothy/targets/arm_v81m/arch_v81m.py index f2c64778..5c2ab6e6 100644 --- a/slothy/targets/arm_v81m/arch_v81m.py +++ b/slothy/targets/arm_v81m/arch_v81m.py @@ -122,11 +122,12 @@ class LeLoop(Loop): Loop ending in a le instruction. Example: - ``` - loop_lbl: - {code} - le , loop_lbl - ``` + + .. code-block:: + + loop_lbl: + {code} + le , loop_lbl where cnt is the loop counter in lr. """ diff --git a/tutorial_files/opt/X25519-AArch64-simple_opt.s b/tutorial_files/opt/X25519-AArch64-simple_opt.s new file mode 100644 index 00000000..3bedd5ad --- /dev/null +++ b/tutorial_files/opt/X25519-AArch64-simple_opt.s @@ -0,0 +1,3652 @@ + /* X25519-AArch64 by Emil Lenngren (2018) + * + * To the extent possible under law, the person who associated CvC0 with + * X25519-AArch64 has waived all copyright and related or neighboring rights + * to X25519-AArch64. + * + * You should have received a copy of the CvC0 legalcode along with this + * work. If not, see . + */ + +/* + * This is an AArch64 implementation of X25519. + * It follows the reference implementation where the representation of + * a field element [0..2^255-19) is represented by a 256-bit little endian integer, + * reduced modulo 2^256-38, and may possibly be in the range [2^256-38..2^256). + * The scalar is a 256-bit integer where certain bits are hardcoded per specification. + * + * The implementation runs in constant time (~145k cycles on Cortex-vA53), + * and no conditional branches or memory access pattern depend on secret data. + */ + +/* + * Implementation manually de-interleaved and modularized for use with SLOTHY. See + * + * Fast and Clean: Auditable High Performance Assembly via Constraint Solving + * (Abdulrahman, Becker, Kannwischer, Klein) + */ + +#include +#include "instruction_wrappers.i" + +.macro fcsel_dform out, in0, in1, cond // @slothy:no-unfold + fcsel dform_\out, dform_\in0, dform_\in1, \cond +.endm + +#define STACK_MASK1 0 +#define STACK_MASK2 8 +#define STACK_A_0 16 +#define STACK_A_8 (STACK_A_0+ 8) +#define STACK_A_16 (STACK_A_0+16) +#define STACK_A_24 (STACK_A_0+24) +#define STACK_A_32 (STACK_A_0+32) +#define STACK_B_0 64 +#define STACK_B_8 (STACK_B_0+ 8) +#define STACK_B_16 (STACK_B_0+16) +#define STACK_B_24 (STACK_B_0+24) +#define STACK_B_32 (STACK_B_0+32) +#define STACK_CTR 104 +#define STACK_LASTBIT 108 +#define STACK_SCALAR 112 +#define STACK_X_0 168 +#define STACK_X_8 (STACK_X_0+ 8) +#define STACK_X_16 (STACK_X_0+16) +#define STACK_X_24 (STACK_X_0+24) +#define STACK_X_32 (STACK_X_0+32) +#define STACK_OUT_PTR (STACK_X_0+48) + + .cpu generic+fp+simd + .text + .align 2 + + // in: x0: pointer + // out: x0: loaded value + // .type load64unaligned, %function +load64unaligned: + ldrb w1, [x0] + ldrb w2, [x0, #1] + ldrb w3, [x0, #2] + ldrb w4, [x0, #3] + ldrb w5, [x0, #4] + ldrb w6, [x0, #5] + ldrb w7, [x0, #6] + ldrb w8, [x0, #7] + + orr w1, w1, w2, lsl #8 + orr w3, w3, w4, lsl #8 + orr w5, w5, w6, lsl #8 + orr w7, w7, w8, lsl #8 + + orr w1, w1, w3, lsl #16 + orr w5, w5, w7, lsl #16 + + orr x0, x1, x5, lsl #32 + + ret + // .size load64unaligned, .-load64unaligned + + // in: x0: pointer + // out: x0-x3: loaded value + // .type load256unaligned, %function +load256unaligned: + stp x29, x30, [sp, #-64]! + mov x29, sp + stp x19, x20, [sp, #16] + stp x21, x22, [sp, #32] + + mov x19, x0 + bl load64unaligned + mov x20, x0 + add x0, x19, #8 + bl load64unaligned + mov x21, x0 + add x0, x19, #16 + bl load64unaligned + mov x22, x0 + add x0, x19, #24 + bl load64unaligned + mov x3, x0 + + mov x0, x20 + mov x1, x21 + mov x2, x22 + + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x29, x30, [sp], #64 + ret + // .size load256unaligned, .-load256unaligned + +vAB0 .req v0 +vAB1 .req v1 +vAB2 .req v2 +vAB3 .req v3 +vAB4 .req v4 +vAB5 .req v5 +vAB6 .req v6 +vAB7 .req v7 +vAB8 .req v8 +vAB9 .req v9 + +vT0 .req vAB0 +vT1 .req vAB1 +vT2 .req vAB2 +vT3 .req vAB3 +vT4 .req vAB4 +vT5 .req vAB5 +vT6 .req vAB6 +vT7 .req vAB7 +vT8 .req vAB8 +vT9 .req vAB9 + +vTA0 .req vAB0 +vTA1 .req vAB1 +vTA2 .req vAB2 +vTA3 .req vAB3 +vTA4 .req vAB4 +vTA5 .req vAB5 +vTA6 .req vAB6 +vTA7 .req vAB7 +vTA8 .req vAB8 +vTA9 .req vAB9 + +vBX0 .req v10 +vBX1 .req v11 +vBX2 .req v12 +vBX3 .req v13 +vBX4 .req v14 +vBX5 .req v15 +vBX6 .req v16 +vBX7 .req v17 +vBX8 .req v18 +vBX9 .req v19 + +vDC0 .req vBX0 +vDC1 .req vBX1 +vDC2 .req vBX2 +vDC3 .req vBX3 +vDC4 .req vBX4 +vDC5 .req vBX5 +vDC6 .req vBX6 +vDC7 .req vBX7 +vDC8 .req vBX8 +vDC9 .req vBX9 + +vADBC0 .req v20 +vADBC1 .req v21 +vADBC2 .req v22 +vADBC3 .req v23 +vADBC4 .req v24 +vADBC5 .req v25 +vADBC6 .req v26 +vADBC7 .req v27 +vADBC8 .req v28 +vADBC9 .req v29 + +vX4Z50 .req vADBC0 +vX4Z51 .req vADBC1 +vX4Z52 .req vADBC2 +vX4Z53 .req vADBC3 +vX4Z54 .req vADBC4 +vX4Z55 .req vADBC5 +vX4Z56 .req vADBC6 +vX4Z57 .req vADBC7 +vX4Z58 .req vADBC8 +vX4Z59 .req vADBC9 + +vMaskA .req v30 +vMaskB .req v15 + +vZ20 .req v1 +vZ22 .req v3 +vZ24 .req v5 +vZ26 .req v7 +vZ28 .req v9 + +vZ30 .req v11 +vZ32 .req v13 +vZ34 .req v15 +vZ36 .req v17 +vZ38 .req v19 + +vX20 .req v0 +vX22 .req v2 +vX24 .req v4 +vX26 .req v6 +vX28 .req v8 + +vX30 .req v10 +vX32 .req v12 +vX34 .req v14 +vX36 .req v16 +vX38 .req v18 + +vB0 .req v20 +vB2 .req v21 +vB4 .req v22 +vB6 .req v23 +vB8 .req v24 + +vA0 .req v0 +vA2 .req v2 +vA4 .req v4 +vA6 .req v6 +vA8 .req v8 + +vC0 .req v10 +vC2 .req v12 +vC4 .req v14 +vC6 .req v16 +vC8 .req v18 + +vD0 .req v25 +vD2 .req v26 +vD4 .req v27 +vD6 .req v28 +vD8 .req v29 + +vF0 .req v1 +vF2 .req v3 +vF4 .req v5 +vF6 .req v7 +vF8 .req v9 + +vG0 .req v20 +vG2 .req v21 +vG4 .req v22 +vG6 .req v23 +vG8 .req v24 + +// F +sF0 .req x0 +sF1 .req x1 +sF2 .req x2 +sF3 .req x3 +sF4 .req x4 +sF5 .req x5 +sF6 .req x6 +sF7 .req x7 +sF8 .req x8 +sF9 .req x9 + +sAA0 .req x20 +sAA1 .req x21 +sAA2 .req x22 +sAA3 .req x23 +sAA4 .req x24 +sAA5 .req x25 +sAA6 .req x26 +sAA7 .req x27 +sAA8 .req x28 +sAA9 .req x19 + +stmp .req x2 + +// G +sG0 .req x0 +sG1 .req x1 +sG2 .req x2 +sG3 .req x3 +sG4 .req x4 +sG5 .req x5 +sG6 .req x6 +sG7 .req x7 +sG8 .req x8 +sG9 .req x9 + +sBB0 .req x0 +sBB1 .req x1 +sBB2 .req x2 +sBB3 .req x3 +sBB4 .req x4 +sBB5 .req x5 +sBB6 .req x6 +sBB7 .req x7 +sBB8 .req x8 +sBB9 .req x9 + +// E +sE0 .req x10 +sE1 .req x11 +sE2 .req x12 +sE3 .req x13 +sE4 .req x14 +sE5 .req x15 +sE6 .req x16 +sE7 .req x17 +sE8 .req x19 +sE9 .req x20 + +sZ40 .req x23 +sZ41 .req x3 +sZ42 .req x21 +sZ44 .req x7 +sZ45 .req x6 +sZ46 .req x24 +sZ48 .req x22 + +START: + + +.macro scalar_stack_ldr sA, offset, name + ldr \sA\()0, [sp, #\offset\()_0] // @slothy:reads=[\name\()0] + ldr \sA\()2, [sp, #\offset\()_8] // @slothy:reads=[\name\()8] + ldr \sA\()4, [sp, #\offset\()_16] // @slothy:reads=[\name\()16] + ldr \sA\()6, [sp, #\offset\()_24] // @slothy:reads=[\name\()24] + ldr \sA\()8, [sp, #\offset\()_32] // @slothy:reads=[\name\()32] +.endm + +.macro scalar_stack_str offset, sA, name + stp \sA\()0, \sA\()2, [sp, #\offset\()_0] // @slothy:writes=[\name\()0,\name\()8] + stp \sA\()4, \sA\()6, [sp, #\offset\()_16] // @slothy:writes=[\name\()16,\name\()24] + str \sA\()8, [sp, #\offset\()_32] // @slothy:writes=[\name\()32] +.endm + +.macro vector_stack_str offset, vA, name + stp D<\vA\()0>, D<\vA\()2>, [sp, #\offset\()_0] // @slothy:writes=[\name\()0,\name\()8] + stp D<\vA\()4>, D<\vA\()6>, [sp, #\offset\()_16] // @slothy:writes=[\name\()16,\name\()24] + str D<\vA\()8>, [sp, #\offset\()_32] // @slothy:writes=[\name\()32] +.endm + + // TODO: eliminate this explicit register assignment by converting stack_vld2_lane to AArch64Instruction + xvector_load_lane_tmp .req x26 + +.macro vector_load_lane vA, offset, lane, name + add xvector_load_lane_tmp, sp, #\offset\()_0 + ld2 { \vA\()0.s, \vA\()1.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=[\name\()0] + ld2 { \vA\()2.s, \vA\()3.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=[\name\()8] + ld2 { \vA\()4.s, \vA\()5.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=[\name\()16] + ld2 { \vA\()6.s, \vA\()7.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=[\name\()24] + ld2 { \vA\()8.s, \vA\()9.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=[\name\()32] +.endm + +.macro vector_sub_inner vC0, vC2, vC4, vC6, vC8, vA0, vA2, vA4, vA6, vA8, vB0, vB2, vB4, vB6, vB8 + // (2^255-19)*4 - vB + sub \vC0\().2s, v28.2s, \vB0\().2s + sub \vC2\().2s, v29.2s, \vB2\().2s + sub \vC4\().2s, v29.2s, \vB4\().2s + sub \vC6\().2s, v29.2s, \vB6\().2s + sub \vC8\().2s, v29.2s, \vB8\().2s + + // ... + vA + add \vC0\().2s, \vA0\().2s, \vC0\().2s + add \vC2\().2s, \vA2\().2s, \vC2\().2s + add \vC4\().2s, \vA4\().2s, \vC4\().2s + add \vC6\().2s, \vA6\().2s, \vC6\().2s + add \vC8\().2s, \vA8\().2s, \vC8\().2s +.endm + +.macro vector_sub vC, vA, vB + vector_sub_inner \vC\()0, \vC\()2, \vC\()4, \vC\()6, \vC\()8, \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8 +.endm + + +.macro vector_add_inner vC0, vC2, vC4, vC6, vC8, vA0, vA2, vA4, vA6, vA8, vB0, vB2, vB4, vB6, vB8 + add \vC0\().2s, \vA0\().2s, \vB0\().2s + add \vC2\().2s, \vA2\().2s, \vB2\().2s + add \vC4\().2s, \vA4\().2s, \vB4\().2s + add \vC6\().2s, \vA6\().2s, \vB6\().2s + add \vC8\().2s, \vA8\().2s, \vB8\().2s +.endm + +.macro vector_add vC, vA, vB + vector_add_inner \vC\()0, \vC\()2, \vC\()4, \vC\()6, \vC\()8, \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8 +.endm + +.macro vector_cmov_inner vA0, vA2, vA4, vA6, vA8, vB0, vB2, vB4, vB6, vB8, vC0, vC2, vC4, vC6, vC8 + fcsel_dform \vA0, \vB0, \vC0, eq + fcsel_dform \vA2, \vB2, \vC2, eq + fcsel_dform \vA4, \vB4, \vC4, eq + fcsel_dform \vA6, \vB6, \vC6, eq + fcsel_dform \vA8, \vB8, \vC8, eq +.endm + +.macro vector_cmov vA, vB, vC + vector_cmov_inner \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8, \vC\()0, \vC\()2, \vC\()4, \vC\()6, \vC\()8, +.endm + +.macro vector_transpose_inner vA0, vA1, vA2, vA3, vA4, vA5, vA6, vA7, vA8, vA9, vB0, vB2, vB4, vB6, vB8, vC0, vC2, vC4, vC6, vC8 + trn2 \vA1\().2s, \vB0\().2s, \vC0\().2s + trn1 \vA0\().2s, \vB0\().2s, \vC0\().2s + trn2 \vA3\().2s, \vB2\().2s, \vC2\().2s + trn1 \vA2\().2s, \vB2\().2s, \vC2\().2s + trn2 \vA5\().2s, \vB4\().2s, \vC4\().2s + trn1 \vA4\().2s, \vB4\().2s, \vC4\().2s + trn2 \vA7\().2s, \vB6\().2s, \vC6\().2s + trn1 \vA6\().2s, \vB6\().2s, \vC6\().2s + trn2 \vA9\().2s, \vB8\().2s, \vC8\().2s + trn1 \vA8\().2s, \vB8\().2s, \vC8\().2s +.endm + +.macro vector_transpose vA, vB, vC + vector_transpose_inner \vA\()0, \vA\()1, \vA\()2, \vA\()3, \vA\()4, \vA\()5, \vA\()6, \vA\()7, \vA\()8, \vA\()9, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8, \vC\()0, \vC\()2, \vC\()4, \vC\()6, \vC\()8, +.endm + +.macro vector_to_scalar_inner sA0, sA2, sA4, sA6, sA8, vB0, vB2, vB4, vB6, vB8 + mov \sA0, \vB0\().d[0] + mov \sA2, \vB2\().d[0] + mov \sA4, \vB4\().d[0] + mov \sA6, \vB6\().d[0] + mov \sA8, \vB8\().d[0] +.endm + +.macro vector_to_scalar sA, vB + vector_to_scalar_inner \sA\()0, \sA\()2, \sA\()4, \sA\()6, \sA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8 +.endm + +.macro scalar_to_vector_inner vA0, vA2, vA4, vA6, vA8, sB0, sB2, sB4, sB6, sB8 + mov \vA0\().d[0], \sB0 + mov \vA2\().d[0], \sB2 + mov \vA4\().d[0], \sB4 + mov \vA6\().d[0], \sB6 + mov \vA8\().d[0], \sB8 +.endm + +.macro scalar_to_vector vA, sB + scalar_to_vector_inner \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \sB\()0, \sB\()2, \sB\()4, \sB\()6, \sB\()8 +.endm + + +.macro vector_extract_upper_inner vA0, vA2, vA4, vA6, vA8, vB0, vB2, vB4, vB6, vB8 + mov \vA0\().d[0], \vB0\().d[1] + mov \vA2\().d[0], \vB2\().d[1] + mov \vA4\().d[0], \vB4\().d[1] + mov \vA6\().d[0], \vB6\().d[1] + mov \vA8\().d[0], \vB8\().d[1] +.endm + +.macro vector_extract_upper vA, vB + vector_extract_upper_inner \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8 +.endm + +.macro vector_compress_inner vA0, vA2, vA4, vA6, vA8, vB0, vB1, vB2, vB3, vB4, vB5, vB6, vB7, vB8, vB9 + trn1 \vA0\().4s, \vB0\().4s, \vB1\().4s + trn1 \vA2\().4s, \vB2\().4s, \vB3\().4s + trn1 \vA4\().4s, \vB4\().4s, \vB5\().4s + trn1 \vA6\().4s, \vB6\().4s, \vB7\().4s + trn1 \vA8\().4s, \vB8\().4s, \vB9\().4s +.endm + +.macro vector_compress vA, vB + vector_compress_inner \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()1, \vB\()2, \vB\()3, \vB\()4, \vB\()5, \vB\()6, \vB\()7, \vB\()8, \vB\()9, +.endm + +.macro scalar_clear_carries_inner sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9 + and \sA1, \sA1, #0x1ffffff + and \sA3, \sA3, #0x1ffffff + and \sA5, \sA5, #0x1ffffff + and \sA7, \sA7, #0x1ffffff + mov W<\sA0>, W<\sA0> + mov W<\sA2>, W<\sA2> + mov W<\sA4>, W<\sA4> + mov W<\sA6>, W<\sA6> + mov W<\sA8>, W<\sA8> +.endm + +.macro scalar_clear_carries sA + scalar_clear_carries_inner \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9 +.endm + +.macro scalar_decompress_inner sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9 + lsr \sA1, \sA0, #32 + lsr \sA3, \sA2, #32 + lsr \sA5, \sA4, #32 + lsr \sA7, \sA6, #32 + lsr \sA9, \sA8, #32 +.endm + +.macro scalar_decompress sA + scalar_decompress_inner \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9 +.endm + + // TODO: eliminate those. should be easy + vR_l4h4l5h5 .req vADBC4 + vR_l6h6l7h7 .req vADBC5 + + vR_l0h0l1h1 .req vADBC0 + vR_l2h2l3h3 .req vADBC1 + + vR_l0123 .req vADBC4 + vR_l4567 .req vADBC6 + vR_h0123 .req vADBC5 + vR_h4567 .req vADBC7 + vR_l89h89 .req vADBC8 + + vR_h89xx .req vADBC9 + + vSum0123 .req vADBC0 + vSum4567 .req vADBC1 + vSum89xx .req vADBC2 + + vDiff0123 .req v10 + vDiff4567 .req v11 + vDiff89xx .req v12 + + // TODO: eliminate those explicit register assignments by converting stack_vld1r and stack_vldr_bform to AArch64Instruction + vrepack_inner_tmp .req v19 + vrepack_inner_tmp2 .req v0 + +.macro vector_addsub_repack_inner vA0, vA1, vA2, vA3, vA4, vA5, vA6, vA7, vA8, vA9, vC0, vC1, vC2, vC3, vC4, vC5, vC6, vC7, vC8, vC9 + uzp1 vR_l4h4l5h5.4s, \vC4\().4s, \vC5\().4s + uzp1 vR_l6h6l7h7.4s, \vC6\().4s, \vC7\().4s + ld1r {vrepack_inner_tmp.2d}, [sp] // @slothy:reads=mask1 + uzp1 vR_l4567.4s, vR_l4h4l5h5.4s, vR_l6h6l7h7.4s + uzp2 vR_h4567.4s, vR_l4h4l5h5.4s, vR_l6h6l7h7.4s + trn1 vR_l89h89.4s, \vC8\().4s, \vC9\().4s + ldr B, [sp, #STACK_MASK2] // @slothy:reads=mask2 + uzp1 vR_l0h0l1h1.4s, \vC0\().4s, \vC1\().4s + uzp1 vR_l2h2l3h3.4s, \vC2\().4s, \vC3\().4s + mov vR_h89xx.d[0], vR_l89h89.d[1] + uzp1 vR_l0123.4s, vR_l0h0l1h1.4s, vR_l2h2l3h3.4s + uzp2 vR_h0123.4s, vR_l0h0l1h1.4s, vR_l2h2l3h3.4s + add vDiff4567.4s, vR_l4567.4s, vrepack_inner_tmp.4s + add vDiff89xx.2s, vR_l89h89.2s, vrepack_inner_tmp.2s + mov vrepack_inner_tmp.b[0], vrepack_inner_tmp2.b[0] + add vSum0123.4s, vR_l0123.4s, vR_h0123.4s + add vSum4567.4s, vR_l4567.4s, vR_h4567.4s + add vSum89xx.2s, vR_l89h89.2s, vR_h89xx.2s + add vDiff0123.4s, vR_l0123.4s, vrepack_inner_tmp.4s + sub vDiff4567.4s, vDiff4567.4s, vR_h4567.4s + sub vDiff0123.4s, vDiff0123.4s, vR_h0123.4s + sub vDiff89xx.2s, vDiff89xx.2s, vR_h89xx.2s + zip1 \vA0\().4s, vDiff0123.4s, vSum0123.4s + zip2 \vA2\().4s, vDiff0123.4s, vSum0123.4s + zip1 \vA4\().4s, vDiff4567.4s, vSum4567.4s + zip2 \vA6\().4s, vDiff4567.4s, vSum4567.4s + zip1 \vA8\().2s, vDiff89xx.2s, vSum89xx.2s + zip2 \vA9\().2s, vDiff89xx.2s, vSum89xx.2s + mov \vA1\().d[0], \vA0\().d[1] + mov \vA3\().d[0], \vA2\().d[1] + mov \vA5\().d[0], \vA4\().d[1] + mov \vA7\().d[0], \vA6\().d[1] +.endm + +.macro vector_addsub_repack vA, vC +vector_addsub_repack_inner \vA\()0, \vA\()1, \vA\()2, \vA\()3, \vA\()4, \vA\()5, \vA\()6, \vA\()7, \vA\()8, \vA\()9, \vC\()0, \vC\()1, \vC\()2, \vC\()3, \vC\()4, \vC\()5, \vC\()6, \vC\()7, \vC\()8, \vC\()9 +.endm + +// sAA0 .. sAA9 output AA = A^2 +// sA0 .. sA9 input A +// TODO: simplify (this is still the same instruction order as before; we can make it simpler and leave the re-ordering to Sloty) +.macro scalar_sqr_inner sAA0, sAA1, sAA2, sAA3, sAA4, sAA5, sAA6, sAA7, sAA8, sAA9, sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9 + lsr \sA1, \sA0, #32 + lsr \sA3, \sA2, #32 + lsr \sA5, \sA4, #32 + lsr \sA7, \sA6, #32 + lsr \sA9, \sA8, #32 + add X, \sA9, \sA9 + add X, \sA8, \sA8 + add X, \sA7, \sA7 + add X, \sA6, \sA6 + add X, \sA5, \sA5 + add X, \sA4, \sA4 + add X, \sA3, \sA3 + add X, \sA2, \sA2 + add X, \sA1, \sA1 + umull X, W<\sA4>, W<\sA4> + umull X, W<\sA4>, W + mul W<\sA9>, W<\sA9>, W + mul W<\sA7>, W<\sA7>, W + mul W<\sA5>, W<\sA5>, W + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA0>, W, X + umull X, W<\sA0>, W<\sA0> + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umaddl X, W<\sA0>, W, X + mul W, W<\sA6>, W + umaddl X, W<\sA1>, W, X + umaddl X, W<\sA1>, W, X + umaddl X, W, W, X + umaddl X, W<\sA1>, W, X + umaddl X, W, W, X + umaddl X, W<\sA1>, W, X + umaddl X, W, W, X + umaddl X, W<\sA1>, W, X + mul W, W<\sA8>, W + umaddl X, W<\sA2>, W<\sA2>, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA3>, W, X + umaddl X, W<\sA3>, W, X + umaddl X, W, W, X + umaddl X, W<\sA3>, W, X + umaddl X, W<\sA8>, W, X + umaddl X, W<\sA6>, W, X + add X, X, X, lsr #26 + umaddl X, W<\sA5>, W, X + add X, X, X, lsr #25 + bic X, X, #0x1ffffff + add X, X, X, lsr #24 + and X, X, #0x1ffffff + add X, X, X, lsr #21 + umaddl X, W<\sA7>, W, X + add X, X, X + add X, X, X + add X, X, X + add X, X, X + umaddl X, W, W, X + umaddl X, W, W, X + and X, X, #0x3ffffff + umaddl X, W<\sA7>, W, X + umaddl X, W<\sA7>, W, X + umaddl X, W<\sA7>, W, X + umaddl X, W<\sA7>, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + add \sAA1, X, X, lsr #26 + and \sAA0, X, #0x3ffffff + add \sAA2, X, \sAA1, lsr #25 + bfi \sAA0, \sAA1, #32, #25 + add \sAA3, X, \sAA2, lsr #26 + and \sAA2, \sAA2, #0x3ffffff + add \sAA4, X, \sAA3, lsr #25 + bfi \sAA2, \sAA3, #32, #25 + add \sAA5, X, \sAA4, lsr #26 + and \sAA4, \sAA4, #0x3ffffff + add \sAA6, X, \sAA5, lsr #25 + bfi \sAA4, \sAA5, #32, #25 + add \sAA7, X, \sAA6, lsr #26 + and \sAA6, \sAA6, #0x3ffffff + add \sAA8, X, \sAA7, lsr #25 + bfi \sAA6, \sAA7, #32, #25 + add \sAA9, X, \sAA8, lsr #26 + and \sAA8, \sAA8, #0x3ffffff + bfi \sAA8, \sAA9, #32, #26 +.endm + +.macro scalar_sqr sAA, sA +scalar_sqr_inner \sAA\()0, \sAA\()1, \sAA\()2, \sAA\()3, \sAA\()4, \sAA\()5, \sAA\()6, \sAA\()7, \sAA\()8, \sAA\()9, \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9 +.endm + +// sC0 .. sC9 output C = A*B +// sA0 .. sA9 input A +// sB0 .. sB9 input B +.macro scalar_mul_inner sC0, sC1, sC2, sC3, sC4, sC5, sC6, sC7, sC8, sC9, sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9, sB0, sB1, sB2, sB3, sB4, sB5, sB6, sB7, sB8, sB9 + + + mul W, W<\sA1>, W + mul W, W<\sA2>, W + mul W, W<\sA3>, W + mul W, W<\sA5>, W + mul W, W<\sA6>, W + mul W, W<\sA7>, W + mul W, W<\sA8>, W + mul W, W<\sA9>, W + + umull X, W<\sA1>, W<\sB8> + umaddl X, W<\sA3>, W<\sB6>, X + umaddl X, W<\sA5>, W<\sB4>, X + umaddl X, W<\sA7>, W<\sB2>, X + umaddl X, W<\sA9>, W<\sB0>, X + umaddl X, W<\sA0>, W<\sB9>, X + umaddl X, W<\sA2>, W<\sB7>, X + umaddl X, W<\sA4>, W<\sB5>, X + umaddl X, W<\sA6>, W<\sB3>, X + umaddl X, W<\sA8>, W<\sB1>, X + + umull X, W<\sA1>, W<\sB7> + umaddl X, W<\sA3>, W<\sB5>, X + umaddl X, W<\sA5>, W<\sB3>, X + umaddl X, W<\sA7>, W<\sB1>, X + umaddl X, W, W<\sB9>, X + add X, X, X + umaddl X, W<\sA0>, W<\sB8>, X + umaddl X, W<\sA2>, W<\sB6>, X + umaddl X, W<\sA4>, W<\sB4>, X + umaddl X, W<\sA6>, W<\sB2>, X + umaddl X, W<\sA8>, W<\sB0>, X + + + umull X, W<\sA1>, W<\sB6> + umaddl X, W<\sA3>, W<\sB4>, X + umaddl X, W<\sA5>, W<\sB2>, X + umaddl X, W<\sA7>, W<\sB0>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA0>, W<\sB7>, X + umaddl X, W<\sA2>, W<\sB5>, X + umaddl X, W<\sA4>, W<\sB3>, X + umaddl X, W<\sA6>, W<\sB1>, X + umaddl X, W, W<\sB9>, X + + umull X, W<\sA1>, W<\sB5> + umaddl X, W<\sA3>, W<\sB3>, X + umaddl X, W<\sA5>, W<\sB1>, X + umaddl X, W, W<\sB9>, X + umaddl X, W, W<\sB7>, X + add X, X, X + umaddl X, W<\sA0>, W<\sB6>, X + umaddl X, W<\sA2>, W<\sB4>, X + umaddl X, W<\sA4>, W<\sB2>, X + umaddl X, W<\sA6>, W<\sB0>, X + umaddl X, W, W<\sB8>, X + + umull X, W, W<\sB6> + umaddl X, W<\sA5>, W<\sB0>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA3>, W<\sB2>, X + umaddl X, W<\sA1>, W<\sB4>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA4>, W<\sB1>, X + umaddl X, W<\sA2>, W<\sB3>, X + umaddl X, W<\sA0>, W<\sB5>, X + + umull X, W, W<\sB5> + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA3>, W<\sB1>, X + umaddl X, W<\sA1>, W<\sB3>, X + add X, X, X + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA4>, W<\sB0>, X + umaddl X, W<\sA2>, W<\sB2>, X + umaddl X, W<\sA0>, W<\sB4>, X + + umull X, W, W<\sB4> + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA3>, W<\sB0>, X + umaddl X, W<\sA1>, W<\sB2>, X + mul W, W<\sA4>, W + umaddl X, W, W<\sB5>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA2>, W<\sB1>, X + umaddl X, W<\sA0>, W<\sB3>, X + + add X, X, X, lsr #26 + and \sC4, X, #0x3ffffff + add X, X, X, lsr #25 + and \sC5, X, #0x1ffffff + add X, X, X, lsr #26 + and \sC6, X, #0x3ffffff + add X, X, X, lsr #25 + bfi \sC6, X, #32, #25 + add X, X, X, lsr #26 + and \sC8, X, #0x3ffffff + bic X, X, #0x3ffffff + lsr X, X, #26 + bfi \sC8, X, #32, #26 + add X, X, X, lsr #25 + add X, X, X, lsr #22 + + umaddl X, W, W<\sB1>, X + umaddl X, W, W<\sB3>, X + umaddl X, W, W<\sB5>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + add X, X, X + umaddl X, W, W<\sB2>, X + umaddl X, W, W<\sB4>, X + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA0>, W<\sB0>, X + + umull X, W, W<\sB2> + umaddl X, W, W<\sB4>, X + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA1>, W<\sB0>, X + umaddl X, W, W<\sB3>, X + umaddl X, W, W<\sB5>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA0>, W<\sB1>, X + + umull X, W, W<\sB3> + umaddl X, W, W<\sB5>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA1>, W<\sB1>, X + add X, X, X + umaddl X, W, W<\sB4>, X + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA2>, W<\sB0>, X + umaddl X, W<\sA0>, W<\sB2>, X + + add \sC1, X, X, lsr #26 + and \sC0, X, #0x3ffffff + add \sC2, X, \sC1, lsr #25 + bfi \sC0, \sC1, #32, #25 + add X, X, \sC2, lsr #26 + and \sC2, \sC2, #0x3ffffff + add \sC4, \sC4, X, lsr #25 + bfi \sC2, X, #32, #25 + add \sC5, \sC5, \sC4, lsr #26 + and \sC4, \sC4, #0x3ffffff + bfi \sC4, \sC5, #32, #26 +.endm + +.macro scalar_mul sC, sA, sB +scalar_mul_inner \sC\()0, \sC\()1, \sC\()2, \sC\()3, \sC\()4, \sC\()5, \sC\()6, \sC\()7, \sC\()8, \sC\()9, \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9, \sB\()0, \sB\()1, \sB\()2, \sB\()3, \sB\()4, \sB\()5, \sB\()6, \sB\()7, \sB\()8, \sB\()9 +.endm + +xtmp_scalar_sub_0 .req x21 + +// sC0 .. sC4 output C = A + 4p - B (registers may be the same as A) +// sA0 .. sA4 first operand A +// sB0 .. sB4 second operand B +.macro scalar_sub_inner sC0, sC1, sC2, sC3, sC4, sA0, sA1, sA2, sA3, sA4, sB0, sB1, sB2, sB3, sB4 + + ldr xtmp_scalar_sub_0, #=0x07fffffe07fffffc + add \sC1, \sA1, xtmp_scalar_sub_0 + add \sC2, \sA2, xtmp_scalar_sub_0 + add \sC3, \sA3, xtmp_scalar_sub_0 + add \sC4, \sA4, xtmp_scalar_sub_0 + movk xtmp_scalar_sub_0, #0xffb4 + add \sC0, \sA0, xtmp_scalar_sub_0 + sub \sC0, \sC0, \sB0 + sub \sC1, \sC1, \sB1 + sub \sC2, \sC2, \sB2 + sub \sC3, \sC3, \sB3 + sub \sC4, \sC4, \sB4 +.endm + +.macro scalar_sub sC, sA, sB +scalar_sub_inner \sC\()0, \sC\()2, \sC\()4, \sC\()6, \sC\()8, \sA\()0, \sA\()2, \sA\()4, \sA\()6, \sA\()8, \sB\()0, \sB\()2, \sB\()4, \sB\()6, \sB\()8 +.endm + + +.macro scalar_addm_inner sC0, sC1, sC2, sC3, sC4, sC5, sC6, sC7, sC8, sC9, sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9, sB0, sB1, sB2, sB3, sB4, sB5, sB6, sB7, sB8, sB9, multconst + + ldr X, #=\multconst + umaddl \sC9, W<\sB9>, W, \sA9 + umaddl \sC0, W<\sB0>, W, \sA0 + umaddl \sC1, W<\sB1>, W, \sA1 + umaddl \sC2, W<\sB2>, W, \sA2 + lsr X, \sC9, #25 + umaddl \sC3, W<\sB3>, W, \sA3 + and \sC9, \sC9, #0x1ffffff + umaddl \sC4, W<\sB4>, W, \sA4 + add \sC0, \sC0, X + umaddl \sC5, W<\sB5>, W, \sA5 + add \sC0, \sC0, X, lsl #1 + umaddl \sC6, W<\sB6>, W, \sA6 + add \sC0, \sC0, X, lsl #4 + umaddl \sC7, W<\sB7>, W, \sA7 + umaddl \sC8, W<\sB8>, W, \sA8 + + add \sC1, \sC1, \sC0, lsr #26 + and \sC0, \sC0, #0x3ffffff + add \sC2, \sC2, \sC1, lsr #25 + and \sC1, \sC1, #0x1ffffff + add \sC3, \sC3, \sC2, lsr #26 + and \sC2, \sC2, #0x3ffffff + add \sC4, \sC4, \sC3, lsr #25 + and \sC3, \sC3, #0x1ffffff + add \sC5, \sC5, \sC4, lsr #26 + and \sC4, \sC4, #0x3ffffff + add \sC6, \sC6, \sC5, lsr #25 + and \sC5, \sC5, #0x1ffffff + add \sC7, \sC7, \sC6, lsr #26 + and \sC6, \sC6, #0x3ffffff + add \sC8, \sC8, \sC7, lsr #25 + and \sC7, \sC7, #0x1ffffff + add \sC9, \sC9, \sC8, lsr #26 + and \sC8, \sC8, #0x3ffffff +.endm + +.macro scalar_addm sC, sA, sB, multconst +scalar_addm_inner \sC\()0, \sC\()1, \sC\()2, \sC\()3, \sC\()4, \sC\()5, \sC\()6, \sC\()7, \sC\()8, \sC\()9, \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9, \sB\()0, \sB\()1, \sB\()2, \sB\()3, \sB\()4, \sB\()5, \sB\()6, \sB\()7, \sB\()8, \sB\()9, \multconst +.endm + +// vAA0 .. vAA9 output AA = A^2 +// vA0 .. vA9 input A +.macro vector_sqr_inner vAA0, vAA1, vAA2, vAA3, vAA4, vAA5, vAA6, vAA7, vAA8, vAA9, vA0, vA1, vA2, vA3, vA4, vA5, vA6, vA7, vA8, vA9 + shl V.2s, \vA9\().2s, #1 + shl V.2s, \vA8\().2s, #1 + shl V.2s, \vA7\().2s, #1 + shl V.2s, \vA6\().2s, #1 + shl V.2s, \vA5\().2s, #1 + shl V.2s, \vA4\().2s, #1 + shl V.2s, \vA3\().2s, #1 + shl V.2s, \vA2\().2s, #1 + shl V.2s, \vA1\().2s, #1 + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, \vA1\().2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umlal V.2d, \vA3\().2s, V.2s + umlal V.2d, \vA4\().2s, V.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, \vA4\().2s, \vA4\().2s + mul V.2s, \vA9\().2s, vconst19.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, \vA1\().2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umlal V.2d, \vA3\().2s, V.2s + umlal V.2d, V.2s, V.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umlal V.2d, V.2s, \vA3\().2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, \vA1\().2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, \vA2\().2s, \vA2\().2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, \vA1\().2s, V.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, V.2s, \vA1\().2s + umull V.2d, \vA0\().2s, V.2s + umull V.2d, \vA0\().2s, \vA0\().2s + usra V.2d, V.2d, #26 + and V.16b, V.16b, vMaskA.16b + mul V.2s, \vA8\().2s, vconst19.2s + bic V.16b, V.16b, vMaskB.16b + and \vA9\().16b, V.16b, vMaskB.16b + usra V.2d, V.2d, #25 + mul V.2s, \vA7\().2s, vconst19.2s + usra V.2d, V.2d, #24 + mul V.2s, \vA6\().2s, vconst19.2s + usra V.2d, V.2d, #21 + mul V.2s, \vA5\().2s, vconst19.2s + shl V.2s, V.2s, #1 + shl V.2s, V.2s, #1 + shl V.2s, V.2s, #1 + shl V.2s, V.2s, #1 + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, \vA6\().2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #26 + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #25 + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #26 + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #25 + umlal V.2d, V.2s, \vA8\().2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #26 + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #25 + usra V.2d, V.2d, #26 + usra V.2d, V.2d, #25 + usra \vAA9\().2d, V.2d, #26 + and \vAA4\().16b, V.16b, vMaskA.16b + and \vAA5\().16b, V.16b, vMaskB.16b + and \vAA0\().16b, V.16b, vMaskA.16b + and \vAA6\().16b, V.16b, vMaskA.16b + and \vAA1\().16b, V.16b, vMaskB.16b + and \vAA7\().16b, V.16b, vMaskB.16b + and \vAA2\().16b, V.16b, vMaskA.16b + and \vAA8\().16b, V.16b, vMaskA.16b + and \vAA3\().16b, V.16b, vMaskB.16b +.endm + +.macro vector_sqr vAA, vA +vector_sqr_inner \vAA\()0, \vAA\()1, \vAA\()2, \vAA\()3, \vAA\()4, \vAA\()5, \vAA\()6, \vAA\()7, \vAA\()8, \vAA\()9, \vA\()0, \vA\()1, \vA\()2, \vA\()3, \vA\()4, \vA\()5, \vA\()6, \vA\()7, \vA\()8, \vA\()9 +.endm + +// vC0 .. vC9 output C = A*B +// vA0 .. vA9 first operand A +// vB0 .. vB9 second operand B +.macro vector_mul_inner vC0, vC1, vC2, vC3, vC4, vC5, vC6, vC7, vC8, vC9, vA0, vA1, vA2, vA3, vA4, vA5, vA6, vA7, vA8, vA9, vB0, vB1, vB2, vB3, vB4, vB5, vB6, vB7, vB8, vB9 + umull \vC9\().2d, \vA0\().2s, \vB9\().2s + umlal \vC9\().2d, \vA2\().2s, \vB7\().2s + umlal \vC9\().2d, \vA4\().2s, \vB5\().2s + umlal \vC9\().2d, \vA6\().2s, \vB3\().2s + umlal \vC9\().2d, \vA8\().2s, \vB1\().2s + mul \vB9\().2s, \vB9\().2s, vconst19.2s + umull \vC8\().2d, \vA1\().2s, \vB7\().2s + umlal \vC8\().2d, \vA3\().2s, \vB5\().2s + umlal \vC8\().2d, \vA5\().2s, \vB3\().2s + umlal \vC8\().2d, \vA7\().2s, \vB1\().2s + umlal \vC8\().2d, \vA9\().2s, \vB9\().2s + umlal \vC9\().2d, \vA1\().2s, \vB8\().2s + umlal \vC9\().2d, \vA3\().2s, \vB6\().2s + umlal \vC9\().2d, \vA5\().2s, \vB4\().2s + umlal \vC9\().2d, \vA7\().2s, \vB2\().2s + umlal \vC9\().2d, \vA9\().2s, \vB0\().2s + shl \vC8\().2d, \vC8\().2d, #1 + umull \vC7\().2d, \vA0\().2s, \vB7\().2s + umlal \vC7\().2d, \vA2\().2s, \vB5\().2s + umlal \vC7\().2d, \vA4\().2s, \vB3\().2s + umlal \vC7\().2d, \vA6\().2s, \vB1\().2s + umlal \vC7\().2d, \vA8\().2s, \vB9\().2s + mul \vB7\().2s, \vB7\().2s, vconst19.2s + umlal \vC8\().2d, \vA0\().2s, \vB8\().2s + umlal \vC8\().2d, \vA2\().2s, \vB6\().2s + umlal \vC8\().2d, \vA4\().2s, \vB4\().2s + umlal \vC8\().2d, \vA6\().2s, \vB2\().2s + umlal \vC8\().2d, \vA8\().2s, \vB0\().2s + mul \vB8\().2s, \vB8\().2s, vconst19.2s + umull \vC6\().2d, \vA1\().2s, \vB5\().2s + umlal \vC6\().2d, \vA3\().2s, \vB3\().2s + umlal \vC6\().2d, \vA5\().2s, \vB1\().2s + umlal \vC6\().2d, \vA7\().2s, \vB9\().2s + umlal \vC6\().2d, \vA9\().2s, \vB7\().2s + umlal \vC7\().2d, \vA1\().2s, \vB6\().2s + umlal \vC7\().2d, \vA3\().2s, \vB4\().2s + umlal \vC7\().2d, \vA5\().2s, \vB2\().2s + umlal \vC7\().2d, \vA7\().2s, \vB0\().2s + umlal \vC7\().2d, \vA9\().2s, \vB8\().2s + shl \vC6\().2d, \vC6\().2d, #1 + umull \vC5\().2d, \vA0\().2s, \vB5\().2s + umlal \vC5\().2d, \vA2\().2s, \vB3\().2s + umlal \vC5\().2d, \vA4\().2s, \vB1\().2s + umlal \vC5\().2d, \vA6\().2s, \vB9\().2s + umlal \vC5\().2d, \vA8\().2s, \vB7\().2s + mul \vB5\().2s, \vB5\().2s, vconst19.2s + umlal \vC6\().2d, \vA0\().2s, \vB6\().2s + umlal \vC6\().2d, \vA2\().2s, \vB4\().2s + umlal \vC6\().2d, \vA4\().2s, \vB2\().2s + umlal \vC6\().2d, \vA6\().2s, \vB0\().2s + umlal \vC6\().2d, \vA8\().2s, \vB8\().2s + mul \vB6\().2s, \vB6\().2s, vconst19.2s + umull \vC4\().2d, \vA1\().2s, \vB3\().2s + umlal \vC4\().2d, \vA3\().2s, \vB1\().2s + umlal \vC4\().2d, \vA5\().2s, \vB9\().2s + umlal \vC4\().2d, \vA7\().2s, \vB7\().2s + umlal \vC4\().2d, \vA9\().2s, \vB5\().2s + umlal \vC5\().2d, \vA1\().2s, \vB4\().2s + umlal \vC5\().2d, \vA3\().2s, \vB2\().2s + umlal \vC5\().2d, \vA5\().2s, \vB0\().2s + umlal \vC5\().2d, \vA7\().2s, \vB8\().2s + umlal \vC5\().2d, \vA9\().2s, \vB6\().2s + shl \vC4\().2d, \vC4\().2d, #1 + umull \vC3\().2d, \vA0\().2s, \vB3\().2s + umlal \vC3\().2d, \vA2\().2s, \vB1\().2s + umlal \vC3\().2d, \vA4\().2s, \vB9\().2s + umlal \vC3\().2d, \vA6\().2s, \vB7\().2s + umlal \vC3\().2d, \vA8\().2s, \vB5\().2s + mul \vB3\().2s, \vB3\().2s, vconst19.2s + umlal \vC4\().2d, \vA0\().2s, \vB4\().2s + umlal \vC4\().2d, \vA2\().2s, \vB2\().2s + umlal \vC4\().2d, \vA4\().2s, \vB0\().2s + umlal \vC4\().2d, \vA6\().2s, \vB8\().2s + umlal \vC4\().2d, \vA8\().2s, \vB6\().2s + mul \vB4\().2s, \vB4\().2s, vconst19.2s + umull \vC2\().2d, \vA1\().2s, \vB1\().2s + umlal \vC2\().2d, \vA3\().2s, \vB9\().2s + umlal \vC2\().2d, \vA5\().2s, \vB7\().2s + umlal \vC2\().2d, \vA7\().2s, \vB5\().2s + umlal \vC2\().2d, \vA9\().2s, \vB3\().2s + umlal \vC3\().2d, \vA1\().2s, \vB2\().2s + umlal \vC3\().2d, \vA3\().2s, \vB0\().2s + umlal \vC3\().2d, \vA5\().2s, \vB8\().2s + umlal \vC3\().2d, \vA7\().2s, \vB6\().2s + umlal \vC3\().2d, \vA9\().2s, \vB4\().2s + shl \vC2\().2d, \vC2\().2d, #1 + umull \vC1\().2d, \vA0\().2s, \vB1\().2s + umlal \vC1\().2d, \vA2\().2s, \vB9\().2s + umlal \vC1\().2d, \vA4\().2s, \vB7\().2s + umlal \vC1\().2d, \vA6\().2s, \vB5\().2s + umlal \vC1\().2d, \vA8\().2s, \vB3\().2s + mul \vB1\().2s, \vB1\().2s, vconst19.2s + umlal \vC2\().2d, \vA0\().2s, \vB2\().2s + umlal \vC2\().2d, \vA2\().2s, \vB0\().2s + umlal \vC2\().2d, \vA4\().2s, \vB8\().2s + umlal \vC2\().2d, \vA6\().2s, \vB6\().2s + umlal \vC2\().2d, \vA8\().2s, \vB4\().2s + mul \vB2\().2s, \vB2\().2s, vconst19.2s + umull \vC0\().2d, \vA1\().2s, \vB9\().2s + umlal \vC0\().2d, \vA3\().2s, \vB7\().2s + umlal \vC0\().2d, \vA5\().2s, \vB5\().2s + ushr vMaskB.2d, vMaskA.2d, #1 + usra \vC3\().2d, \vC2\().2d, #26 + and \vC2\().16b, \vC2\().16b, vMaskA.16b + umlal \vC1\().2d, \vA1\().2s, \vB0\().2s + usra \vC4\().2d, \vC3\().2d, #25 + and \vC3\().16b, \vC3\().16b, vMaskB.16b + umlal \vC0\().2d, \vA7\().2s, \vB3\().2s + usra \vC5\().2d, \vC4\().2d, #26 + and \vC4\().16b, \vC4\().16b, vMaskA.16b + umlal \vC1\().2d, \vA3\().2s, \vB8\().2s + usra \vC6\().2d, \vC5\().2d, #25 + and \vC5\().16b, \vC5\().16b, vMaskB.16b + umlal \vC0\().2d, \vA9\().2s, \vB1\().2s + usra \vC7\().2d, \vC6\().2d, #26 + and \vC6\().16b, \vC6\().16b, vMaskA.16b + umlal \vC1\().2d, \vA5\().2s, \vB6\().2s + umlal \vC1\().2d, \vA7\().2s, \vB4\().2s + umlal \vC1\().2d, \vA9\().2s, \vB2\().2s + usra \vC8\().2d, \vC7\().2d, #25 + and \vC7\().16b, \vC7\().16b, vMaskB.16b + shl \vC0\().2d, \vC0\().2d, #1 + usra \vC9\().2d, \vC8\().2d, #26 + and \vC8\().16b, \vC8\().16b, vMaskA.16b + umlal \vC0\().2d, \vA0\().2s, \vB0\().2s + umlal \vC0\().2d, \vA2\().2s, \vB8\().2s + umlal \vC0\().2d, \vA4\().2s, \vB6\().2s + umlal \vC0\().2d, \vA6\().2s, \vB4\().2s + umlal \vC0\().2d, \vA8\().2s, \vB2\().2s + bic \vB9\().16b, \vC9\().16b, vMaskB.16b + and \vC9\().16b, \vC9\().16b, vMaskB.16b + usra \vC0\().2d, \vB9\().2d, #25 + usra \vC0\().2d, \vB9\().2d, #24 + usra \vC0\().2d, \vB9\().2d, #21 + usra \vC1\().2d, \vC0\().2d, #26 + and \vC0\().16b, \vC0\().16b, vMaskA.16b + usra \vC2\().2d, \vC1\().2d, #25 + and \vC1\().16b, \vC1\().16b, vMaskB.16b + usra \vC3\().2d, \vC2\().2d, #26 + and \vC2\().16b, \vC2\().16b, vMaskA.16b +.endm + +.macro vector_mul vC, vA, vB +vector_mul_inner \vC\()0, \vC\()1, \vC\()2, \vC\()3, \vC\()4, \vC\()5, \vC\()6, \vC\()7, \vC\()8, \vC\()9, \vA\()0, \vA\()1, \vA\()2, \vA\()3, \vA\()4, \vA\()5, \vA\()6, \vA\()7, \vA\()8, \vA\()9, \vB\()0, \vB\()1, \vB\()2, \vB\()3, \vB\()4, \vB\()5, \vB\()6, \vB\()7, \vB\()8, \vB\()9 +.endm + + // in: x1: scalar pointer, x2: base point pointer + // out: x0: result pointer + .global x25519_scalarmult_alt_orig + .global _x25519_scalarmult_alt_orig + // .type x25519_scalarmult, %function +x25519_scalarmult_alt_orig: +_x25519_scalarmult_alt_orig: + stp x29, x30, [sp, #-160]! + mov x29, sp + stp x19, x20, [sp, #16] + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + stp x25, x26, [sp, #64] + stp x27, x28, [sp, #80] + stp d8, d9, [sp, #96] + stp d10, d11, [sp, #112] + stp d12, d13, [sp, #128] + stp d14, d15, [sp, #144] + sub sp, sp, STACK_OUT_PTR+8 + + // 0: mask1, 8: mask2, 16: AA, 56: B/BB, 96: counter, 100: lastbit, 104: scalar, 136: X1, 176: outptr, 184: padding, 192: fp, 200: lr + + str x0, [sp, STACK_OUT_PTR] // outptr + mov x19, x2 // point + + mov x0, x1 // scalar + bl load256unaligned + + and x3, x3, #0x7fffffffffffffff + and x0, x0, #0xfffffffffffffff8 + orr x3, x3, #0x4000000000000000 + + stp x0, x1, [sp, STACK_SCALAR] + stp x2, x3, [sp, STACK_SCALAR+16] + + mov x0, x19 // point + bl load256unaligned + + // Unpack point (discard most significant bit) + lsr x12, x0, #51 + lsr x17, x2, #51 + orr w12, w12, w1, lsl #13 + orr w17, w17, w3, lsl #13 + ubfx x8, x3, #12, #26 + ubfx x9, x3, #38, #25 + ubfx x11, x0, #26, #25 + ubfx x13, x1, #13, #25 + lsr x14, x1, #38 + ubfx x16, x2, #25, #26 + and w10, w0, #0x3ffffff + and w12, w12, #0x3ffffff + and w15, w2, #0x1ffffff + and w17, w17, #0x1ffffff + stp w10, w11, [sp, STACK_X_0] + stp w12, w13, [sp, STACK_X_8] + stp w14, w15, [sp, STACK_X_16] + stp w16, w17, [sp, STACK_X_24] + stp w8, w9, [sp, STACK_X_32] + + // X2 (initially set to 1) + mov x1, #1 + mov v0.d[0], x1 + mov v2.d[0], xzr + mov v4.d[0], xzr + mov v6.d[0], xzr + mov v8.d[0], xzr + + // Z2 (initially set to 0) + mov v1.d[0], xzr + mov v3.d[0], xzr + mov v5.d[0], xzr + mov v7.d[0], xzr + mov v9.d[0], xzr + + // X3 (initially set to X1) + mov v10.s[0], w10 + mov v10.s[1], w11 + mov v12.s[0], w12 + mov v12.s[1], w13 + mov v14.s[0], w14 + mov v14.s[1], w15 + mov v16.s[0], w16 + mov v16.s[1], w17 + mov v18.s[0], w8 + mov v18.s[1], w9 + + // Z3 (initially set to 1) + mov v11.d[0], x1 + mov v13.d[0], xzr + mov v15.d[0], xzr + mov v17.d[0], xzr + mov v19.d[0], xzr + + mov x0, #255-1 // 255 iterations + str W0, [sp, #STACK_CTR] // @slothy:writes=ctr + + const19 .req x30 + vconst19 .req v31 + + mov w30, #19 + dup vconst19.2s, w30 + mov x0, #(1<<26)-1 + dup v30.2d, x0 + ldr x0, #=0x07fffffe07fffffc + // TODO: I do not quite understand what the two stps are doing + // First seems to write bytes 0-15 (mask1+mask2); second seems to write bytes 16-31 (mask2+A) + // stp x0, x0, [sp, #STACK_MASK1] // @slothy:writes=mask1 + + sub x1, x0, #0xfc-0xb4 + str x0, [sp, #STACK_MASK1] // @slothy:writes=mask1 + str x1, [sp, #STACK_MASK2] // @slothy:writes=mask2 + + ldr d28, [sp, #STACK_MASK2] // @slothy:reads=mask2 + ldr d29, [sp, #STACK_MASK1] // @slothy:reads=mask1 + + ldrb w1, [sp, #STACK_SCALAR+31] + lsr w1, w1, #6 + str w1, [sp, #STACK_LASTBIT] // @slothy:writes=lastbit + mainloop: + sub v22.2S, v29.2S, v13.2S // ............*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v25.2S, v12.2S, v13.2S // ...........................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v20.2S, v10.2S, v11.2S // ..........................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v23.2S, v29.2S, v5.2S // ...*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v21.2S, v4.2S, v23.2S // ........*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + tst w1, #1 // *............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v27.2S, v2.2S, v3.2S // ......................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v23, v27, v25, eq // ................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mov x6, v23.d[0] // .....................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add v22.2S, v12.2S, v22.2S // .................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + sub v26.2S, v29.2S, v19.2S // ...............*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + sub v13.2S, v29.2S, v17.2S // ..............*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v24.2S, v29.2S, v9.2S // .....*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + sub v12.2S, v29.2S, v7.2S // ....*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v23.2S, v29.2S, v15.2S // .............*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + sub v3.2S, v29.2S, v3.2S // ..*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v29.2S, v18.2S, v26.2S // ....................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v26.2S, v28.2S, v11.2S // ...........*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v11.2S, v18.2S, v19.2S // ..............................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v19.2S, v14.2S, v23.2S // ..................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v23.2S, v0.2S, v1.2S // .....................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add v18.2S, v8.2S, v9.2S // .........................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v9.2S, v14.2S, v15.2S // ............................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v14.2S, v8.2S, v24.2S // ..........*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v8.2S, v6.2S, v12.2S // .........*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v3.2S, v2.2S, v3.2S // .......*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v12.2S, v28.2S, v1.2S // .*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add v24.2S, v0.2S, v12.2S // ......*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v0, v23, v20, eq // ...............................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v28.2S, v10.2S, v26.2S // ................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + fcsel_dform v12, v24, v28, eq // ...................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v1, v3, v22, eq // ....................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v26.2S, v16.2S, v17.2S // .............................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add v15.2S, v6.2S, v7.2S // ........................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v6, v15, v26, eq // ..................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v17, v21, v19, eq // .....................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn1 v7.2S, v15.2S, v8.2S // ................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mov x21, v6.d[0] // .......................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x22, x21, #32 // ........................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v10, v18, v11, eq // ...................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mov x24, v10.d[0] // ........................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x13, x24, #32 // .........................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v5.2S, v4.2S, v5.2S // .......................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v10, v5, v9, eq // .................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + mov x0, v10.d[0] // ......................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x14, x0, #32 // .......................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x28, x14, x14 // ..............................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v15.2S, v15.2S, v8.2S // ...............................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + stp d12, d1, [sp, #STACK_B_0] // ..................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:writes=[B0,B8] + trn2 v1.2S, v5.2S, v21.2S // .............................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn1 v5.2S, v5.2S, v21.2S // ..............................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v2.2S, v27.2S, v3.2S // ............................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mov x8, v0.d[0] // ....................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x26, w0, w0 // ...................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x23, x8, #32 // .....................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x17, x23, x23 // ..................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v3.2S, v27.2S, v3.2S // ...........................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x20, x13, x13 // ..........................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x7, x0, x0 // ...............................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x12, w0, w28 // ....................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x0, x24, x24 // ...........................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x11, w8, w20, x12 // .........................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x12, w23, w0, x11 // ...........................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v27.2S, v16.2S, v13.2S // ...................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v13.2S, v18.2S, v14.2S // .................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn1 v12.2S, v22.2S, v25.2S // ...........................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + trn2 v22.2S, v22.2S, v25.2S // ..........................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w19, w13, w30 // .....................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x2, w19, w20, x26 // ........................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x26, w8, w0, x2 // ..................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x10, x22, x22 // ............................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x4, w6, w10, x12 // ..................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x20, x21, x21 // .............................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x29, w17, w10, x26 // ..........................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x2, w6, w20, x29 // .................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + lsr x27, x6, #32 // ......................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x29, x27, x27 // ................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mul w16, w14, w30 // .......................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x13, w29, w28, x2 // .....................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x9, w8, w8 // ..........................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x26, x6, x6 // .................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x5, w16, w28, x9 // ..........................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x9, w8, w20 // ................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x3, w17, w28, x9 // ........................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x12, w6, w7, x3 // ...............................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x3, x28, x28 // ...................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w25, w22, w30 // ......................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w18, w21, w30 // ...................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x14, w8, w26 // ............................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x9, w23, w17, x14 // ....................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x9, w21, w18, x9 // ........................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x2, w25, w3, x9 // ..........................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w22, w24, w30 // ............................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x9, w27, w29, x12 // ...................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x15, w24, w22, x9 // .......................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x9, x10, x10 // ....................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v4, v8, v27, eq // ......................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + stp d17, d4, [sp, #STACK_B_16] // ...................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:writes=[B16,B24] + umaddl x1, w27, w20, x4 // ......................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x11, x1, x13, lsr #26 // .........................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x12, x5, x11, lsr #25 // ...........................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + bic x1, x11, #0x1ffffff // ............................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x21, x12, x1, lsr #24 // .............................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn1 v4.2S, v18.2S, v14.2S // ..................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x24, x29, x29 // ..................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x5, x21, x1, lsr #21 // ...............................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x14, w18, w7, x5 // .....................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x14, w25, w24, x14 // ........................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x21, w22, w26, x14 // ............................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + trn1 v0.2S, v23.2S, v24.2S // ..........................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v16.2S, v19.2S, v9.2S // ............................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x12, w8, w7 // ..............................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x1, w17, w29, x12 // ......................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x5, w6, w6, x1 // .............................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x12, w25, w10, x5 // ................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x4, w22, w20, x12 // ................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x3, w19, w3, x4 // ......................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel_dform v21, v14, v29, eq // .......................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v14.2S, v19.2S, v9.2S // .............................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn1 v18.2S, v29.2S, v11.2S // .................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn2 v9.2S, v23.2S, v24.2S // .........................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v23.2S, v29.2S, v11.2S // ................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x5, w8, w28 // ...............................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x16, w8, w29 // .............................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x4, w23, w26, x16 // .....................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x4, w25, w20, x4 // ...........................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x12, w22, w28, x4 // ...............................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x14, w19, w7, x12 // .....................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x16, x17, x17 // .................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x16, w19, w16, x21 // ..................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x1, w23, w7, x5 // .......................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x21, w6, w29, x1 // ..............................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x12, w22, w10, x21 // .................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x4, w8, w10 // .................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x21, w19, w20, x12 // .......................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x2, w22, w7, x2 // ..............................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x12, w19, w24, x2 // ....................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x2, w23, w20, x4 // .........................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x1, w6, w28, x2 // ................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x2, w27, w7, x1 // ....................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x10, x16, #0x3ffffff // ...........................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x6, w19, w0, x2 // .........................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x2, w8, w17 // ...........................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x8, w18, w28, x2 // ......................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x1, x13, #0x3ffffff // .......................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x4, [sp, #STACK_B_0] // ................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. // @slothy:reads=[B0] + umaddl x24, w25, w7, x8 // .........................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w22, w29, x24 // .............................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x27, w19, w9, x15 // ........................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x17, w19, w26, x18 // ...................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x25, x17, x16, lsr #26 // ..........................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x5, x12, x25, lsr #25 // ............................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + lsr x8, x4, #32 // .....................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x23, x8, x8 // ..................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x29, x23, x23 // .................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x0, x14, x5, lsr #26 // ..............................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x17, x3, x0, lsr #25 // ................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x20, x21, x17, lsr #26 // ..................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x13, x27, x20, lsr #25 // ....................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x22, x6, x13, lsr #26 // ......................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + str d21, [sp, #STACK_B_32] // ....................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:writes=[B32] + ldr x12, [sp, #STACK_B_32] // ....................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:reads=[B32] + lsr x21, x12, #32 // .........................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x3, x21, x21 // ..........................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x27, x5, #0x3ffffff // ...............................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and x26, x13, #0x3ffffff // .......................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x26, x22, #32, #25 // .........................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x27, x0, #32, #25 // .................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x22, x1, x22, lsr #25 // ........................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x14, x12, x12 // ...........................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + bfi x10, x25, #32, #25 // .............................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + and x17, x17, #0x3ffffff // ...................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x17, x20, #32, #25 // .....................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + ldr x28, [sp, #STACK_B_8] // .................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ // @slothy:reads=[B8] + lsr x2, x28, #32 // ......................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x13, [sp, #STACK_B_16] // ..................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:reads=[B16] + lsr x19, x13, #32 // .......................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w15, w19, w30 // .......................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x1, w4, w4 // ..........................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + stp x17, x26, [sp, #STACK_A_16] // ..............................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:writes=[A16,A24] + stp x10, x27, [sp, #STACK_A_0] // .............................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ // @slothy:writes=[A0,A8] + and x5, x22, #0x3ffffff // ...........................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and x20, x11, #0x1ffffff // ..............................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x18, [sp, #STACK_B_24] // ...................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:reads=[B24] + lsr x27, x18, #32 // ........................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x24, x27, x27 // ............................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x7, w4, w24 // .................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x16, x18, x18 // .............................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x6, x20, x22, lsr #26 // ..........................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x11, x24, x24 // ....................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x5, x6, #32, #26 // ............................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + str x5, [sp, #STACK_A_32] // ...............................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. // @slothy:writes=[A32] + mul w22, w12, w30 // ............................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x5, w4, w16 // ................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x26, x2, x2 // ................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x25, x13, x13 // ...............................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x17, x19, x19 // ..............................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x0, w23, w17, x5 // ........................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x19, w28, w25, x0 // ...............................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x6, w2, w26, x19 // ...................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x12, w12, w22, x6 // .......................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x9, x28, x28 // .................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x10, w4, w9 // ............................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x6, w8, w23, x10 // ....................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w10, w18, w30 // ...................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w18, w10, x6 // ........................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w5, w27, w30 // ......................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x20, x17, x17 // ...................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x0, w5, w20, x18 // ..........................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x19, w22, w25, x0 // ..............................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w6, w21, w30 // .....................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x0, w4, w17 // ...............................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x21, w8, w25, x0 // .......................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x27, w28, w26, x21 // ..............................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x0, w22, w24, x27 // .................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x27, w6, w16, x0 // .......................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x21, w4, w23 // ...........................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x18, w10, w17, x21 // ......................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w5, w25, x18 // .........................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w22, w26, x18 // .............................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x0, w6, w9, x18 // ...................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x21, w4, w25 // ..............................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w23, w26, x21 // ......................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w28, w28, x18 // .............................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x21, w13, w17 // ....................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x21, w4, w3, x21 // .........................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x21, w8, w14, x21 // ...........................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x21, w28, w24, x21 // ..................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x21, w2, w16, x21 // ......................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x1, w15, w17, x1 // ..........................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x15, w13, w13 // ...................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x15, w6, w3, x15 // ........................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x13, w4, w14, x15 // ..................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x13, w23, w24, x13 // ..........................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w28, w16, x13 // .................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x13, w26, w17, x23 // .....................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x15, x21, x13, lsr #26 // .........................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x21, x1, x15, lsr #25 // ...........................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x1, x26, x26 // ..................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w6, w1, x19 // ....................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x3, w4, w26 // .............................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x4, w8, w9, x3 // .....................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x26, w5, w16, x4 // ...........................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x3, w22, w17, x26 // ...............................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x26, w6, w25, x3 // .....................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + bic x3, x15, #0x1ffffff // ............................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x21, x21, x3, lsr #24 // .............................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x19, x21, x3, lsr #21 // ...............................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x10, w10, w25, x19 // .....................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x10, w5, w1, x10 // ........................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x10, w22, w9, x10 // ............................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x19, w6, w29, x10 // ..................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x3, x0, x19, lsr #26 // ..........................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x1, x23, x3, lsr #25 // ............................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x21, x26, x1, lsr #26 // ..............................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x9, #=0x07fffffe07fffffc // .....................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + ldr x10, [sp, #STACK_A_32] // ....................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:reads=[A32] + add x4, x10, x9 // .........................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x23, [sp, #STACK_A_8] // .................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ // @slothy:reads=[A8] + add x29, x23, x9 // ......................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x26, w5, w24, x18 // ................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and x1, x1, #0x3ffffff // ...............................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + bfi x1, x21, #32, #25 // .................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + sub x5, x29, x1 // .............................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + and x0, x3, #0x1ffffff // .................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x23, w22, w16, x26 // ................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x22, w6, w20, x23 // ......................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x10, x22, x21, lsr #25 // ................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x20, x27, x10, lsr #26 // ..................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x29, x20, #0x1ffffff // ...................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w6, w11, x12 // ........................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x27, x23, x20, lsr #25 // ....................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w8, w16, x7 // .........................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w28, w17, x23 // ................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x28, w2, w25, x23 // ....................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x25, w6, w14, x28 // .........................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x11, x25, x27, lsr #26 // ......................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x23, x13, #0x3ffffff // .......................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x2, x23, x11, lsr #25 // ........................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x25, x10, #0x3ffffff // ...................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x25, x20, #32, #25 // .....................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + and x18, x21, #0x1ffffff // ..................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x16, [sp, #STACK_A_16] // ..................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:reads=[A16] + add x26, x16, x9 // .......................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x13, x15, #0x1ffffff // ..............................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x13, x13, x2, lsr #26 // ..........................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x6, [sp, #STACK_A_24] // ...................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:reads=[A24] + trn2 v6.2S, v28.2S, v20.2S // ........................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v10.2S, v28.2S, v20.2S // .........................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v11.2S, v27.2S, v26.2S // ..............................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v17.2S, v27.2S, v26.2S // ...............................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x22, x6, x9 // ........................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub x26, x26, x25 // ..............................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x14, #=121666 // ...............................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mov w10, w1 // ......................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x16, x27, #0x3ffffff // .......................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x16, x11, #32, #25 // .........................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mov w7, w16 // ........................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x6, x2, #0x3ffffff // ...........................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and x15, x19, #0x3ffffff // ...........................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + bfi x6, x13, #32, #26 // ............................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and x2, x11, #0x1ffffff // ....................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + movk x9, #0xffb4 // ..........................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mov w21, w25 // .......................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mov w24, w6 // .........................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + str x6, [sp, #STACK_B_32] // ...............................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. // @slothy:writes=[B32] + stp x25, x16, [sp, #STACK_B_16] // ..............................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:writes=[B16,B24] + lsr x25, x26, #32 // ............................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + ldr x12, [sp, #STACK_A_0] // ................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. // @slothy:reads=[A0] + add x12, x12, x9 // ...........................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + bfi x15, x3, #32, #25 // .............................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + stp x15, x1, [sp, #STACK_B_0] // .............................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ // @slothy:writes=[B0,B8] + mov w9, w15 // .....................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + sub x16, x22, x16 // ...............................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x3, w16, w14, x7 // ...........................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x23, w26, w14, x21 // .......................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x1, w5, w14, x10 // ...................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x20, x16, #32 // .............................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + sub x19, x12, x15 // ............................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x15, w19, w14, x9 // .................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + lsr x7, x19, #32 // ..........................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x11, w7, w14, x0 // ..................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x10, x5, #32 // ...........................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x21, w10, w14, x18 // .....................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x2, w20, w14, x2 // .............................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x17, w25, w14, x29 // .........................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub x6, x4, x6 // ................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + lsr x27, x6, #32 // ..............................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x12, w27, w14, x13 // ................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + lsr x9, x12, #25 // ....................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x4, x15, x9 // ........................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x4, x4, x9, lsl #1 // ..........................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x0, x4, x9, lsl #4 // ............................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x22, w6, w14, x24 // ..............................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x14, x12, #0x1ffffff // ......................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x4, x11, x0, lsr #26 // ...............................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x15, x1, x4, lsr #25 // .................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x28, x21, x15, lsr #26 // ...................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x9, x23, x28, lsr #25 // .....................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x12, x17, x9, lsr #26 // .......................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x11, x3, x12, lsr #25 // .........................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x29, x2, x11, lsr #26 // ...........................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and x24, x29, #0x1ffffff // ..............................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x13, x4, #0x1ffffff // ..................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x0, x0, #0x3ffffff // ................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x8, w13, w20 // ...................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x23, w13, w6 // .........................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x2, x28, #0x1ffffff // ......................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x21, x15, #0x3ffffff // ....................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x18, x22, x29, lsr #25 // .............................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + and x28, x18, #0x3ffffff // ................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and x4, x12, #0x1ffffff // ..........................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x3, x14, x18, lsr #26 // ...............................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mul w14, w21, w30 // ..................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x1, w2, w25, x8 // ....................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x8, w4, w10, x1 // .....................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x17, w24, w7, x8 // ......................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w29, w3, w30 // ........................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w2, w16, x23 // ..........................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w18, w24, w30 // ......................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x17, w29, w27, x17 // .......................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x23, x17, x17 // ........................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x12, x9, #0x3ffffff // ........................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x8, w4, w26, x22 // ...........................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x8, w24, w5, x8 // ............................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x9, w0, w6, x23 // .........................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x15, x11, #0x3ffffff // ............................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x8, w3, w19, x8 // .............................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x17, w0, w27, x8 // ..............................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x1, w21, w20, x17 // ...............................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x11, w13, w16 // ..............................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w2, w26, x11 // ...............................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x8, w13, w25 // ........................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w4, w5, x23 // ................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x17, w2, w10, x8 // .........................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w4, w7, x17 // ..........................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x3, w18, w27, x23 // ...........................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x9, w21, w16, x9 // ..........................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x17, w24, w19, x22 // .................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x8, w29, w20, x3 // ............................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x8, x8, x8 // .............................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x3, w29, w6, x17 // ..................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w0, w20, x3 // ...................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x11, w0, w16, x8 // ..............................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w3, w28, w30 // .......................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x11, w21, w26, x11 // ...............................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x11, w12, w5, x11 // ................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x8, w15, w19, x11 // .................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x11, w3, w6, x8 // ..................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w17, w4, w30 // ....................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w8, w15, w30 // .....................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x24, w29, w25 // .............................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x24, w18, w20, x24 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w17, w27, x24 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x24, w2, w7, x22 // ................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x24, w13, w10, x24 // .................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x22, x24, x24 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x24, w3, w16, x22 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x24, w8, w6, x24 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x24, w12, w19, x24 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x24, w21, w5, x24 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w21, w25, x23 // ....................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w12, w10, x23 // .....................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x22, w15, w7, x23 // ......................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x23, w29, w16 // ...................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x4, w4, w19, x23 // ....................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x4, w18, w6, x4 // .....................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x4, w2, w5, x4 // ......................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w13, w26, x4 // .......................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x4, w3, w20, x23 // ........................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x4, w8, w27, x4 // .........................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w12, w7, x4 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x4, w21, w10, x23 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x23, w0, w25, x4 // ............................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x9, w12, w26, x9 // ...........................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x9, w15, w5, x9 // ............................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x4, w28, w19, x9 // .............................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x9, w12, w25, x1 // ................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x9, w15, w10, x9 // .................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x9, w28, w7, x9 // ..................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x15, w3, w27, x22 // .......................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x28, w0, w26, x24 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x24, x23, x28, lsr #26 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x1, x11, x24, lsr #25 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x11, x15, x1, lsr #26 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x23, x4, x11, lsr #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x9, x9, x23, lsr #26 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and x15, x24, #0x1ffffff // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x4, x23, #0x3ffffff // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + bfi x4, x9, #32, #26 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x22, w29, w26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w18, w16, x22 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w17, w6, x22 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w2, w19, x23 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x24, w13, w5, x23 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mul v27.2S, v23.2S, v31.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull v29.2D, v0.2S, v22.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v29.2D, v2.2S, v6.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v29.2D, v5.2S, v27.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................... + umull v21.2D, v0.2S, v11.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v21.2D, v2.2S, v16.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v21.2D, v5.2S, v22.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v21.2D, v7.2S, v6.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull v25.2D, v0.2S, v23.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v2.2S, v11.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v5.2S, v16.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v7.2S, v22.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull v20.2D, v9.2S, v11.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v20.2D, v3.2S, v16.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v20.2D, v1.2S, v22.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull v23.2D, v0.2S, v16.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v23.2D, v2.2S, v22.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................. + umull v8.2D, v9.2S, v16.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v3.2S, v22.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull v28.2D, v9.2S, v22.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................... + mul v22.2S, v22.2S, v31.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................... + mul v26.2S, v11.2S, v31.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mul v16.2S, v16.2S, v31.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v28.2D, v3.2S, v6.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v28.2D, v1.2S, v27.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v28.2D, v15.2S, v26.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v28.2D, v13.2S, v16.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................... + shl v28.2D, v28.2D, #1 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v28.2D, v0.2S, v14.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v28.2D, v2.2S, v12.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v20.2D, v15.2S, v6.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v20.2D, v13.2S, v27.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + shl v20.2D, v20.2D, #1 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v20.2D, v0.2S, v18.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v25.2D, v4.2S, v6.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v9.2S, v18.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................ + mul v18.2S, v18.2S, v31.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v1.2S, v6.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v8.2D, v15.2S, v27.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v13.2S, v26.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................... + shl v11.2D, v8.2D, #1 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v11.2D, v0.2S, v17.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v3.2S, v17.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v21.2D, v4.2S, v27.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v23.2D, v5.2S, v6.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v23.2D, v7.2S, v27.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v23.2D, v4.2S, v26.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v23.2D, v9.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v23.2D, v3.2S, v12.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v23.2D, v1.2S, v10.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v23.2D, v15.2S, v18.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................... + umull v19.2D, v0.2S, v6.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................. + umlal v19.2D, v2.2S, v27.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................ + umull v8.2D, v9.2S, v6.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................ + umlal v8.2D, v3.2S, v27.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................... + umull v27.2D, v9.2S, v27.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................... + umlal v27.2D, v3.2S, v26.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................... + umlal v27.2D, v1.2S, v16.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................... + umlal v27.2D, v15.2S, v22.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................ + umlal v8.2D, v1.2S, v26.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v15.2S, v16.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................... + umlal v19.2D, v5.2S, v26.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................... + umlal v19.2D, v7.2S, v16.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................. + umlal v19.2D, v4.2S, v22.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................. + umlal v8.2D, v13.2S, v22.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................ + shl v22.2D, v8.2D, #1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................. + mul w12, w12, w30 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x24, w3, w25, x24 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w8, w20, x24 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x24, w12, w27, x22 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x24, w21, w7, x24 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + mul w2, w2, w30 // ...................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w23, w13, w30 // .................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x22, w29, w10 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w18, w25, x22 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w17, w20, x22 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w2, w27, x22 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w13, w7, x22 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x22, x22, x22 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x22, w3, w26, x22 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x22, w8, w16, x22 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w12, w6, x22 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x21, w21, w19, x22 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + bic x22, x9, #0x3ffffff // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x9, w29, w5 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x9, w18, w26, x9 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x9, w17, w16, x9 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x9, w2, w6, x9 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x13, w13, w19, x9 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + lsr x9, x22, #26 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v29.2D, v7.2S, v26.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v19.2D, v9.2S, v10.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v1.2S, v14.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v15.2S, v12.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v29.2D, v4.2S, v16.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v29.2D, v9.2S, v12.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................... + umlal v29.2D, v3.2S, v10.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................... + umlal v11.2D, v2.2S, v14.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v22.2D, v0.2S, v12.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................... + umlal v22.2D, v2.2S, v10.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................... + umlal v22.2D, v5.2S, v18.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................... + add x9, x9, x22, lsr #25 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x22, x9, x22, lsr #22 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x22, w29, w7, x22 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w18, w10, x22 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x22, w17, w25, x22 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x9, w2, w20, x22 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x2, w23, w27, x9 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x9, x2, x2 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x9, w3, w5, x9 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x26, w8, w26, x9 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x23, w12, w16, x26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x16, w14, w6, x23 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x18, w3, w10, x13 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x13, w8, w25, x18 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x26, w12, w20, x13 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x13, w14, w27, x26 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x12, w0, w7, x13 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x20, w0, w19, x16 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x13, w0, w5, x21 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v21.2D, v9.2S, v17.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v21.2D, v3.2S, v14.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v20.2D, v2.2S, v17.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v20.2D, v5.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................. + mul v14.2S, v14.2S, v31.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................. + mul v17.2S, v17.2S, v31.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v22.2D, v7.2S, v17.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................ + umlal v22.2D, v4.2S, v14.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................... + umlal v29.2D, v1.2S, v18.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................... + umlal v29.2D, v15.2S, v17.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................... + umlal v29.2D, v13.2S, v14.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................... + usra v29.2D, v22.2D, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................. + and v8.16B, v22.16B, v30.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................ + and x19, x28, #0x3ffffff // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x27, x12, x20, lsr #26 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x12, w0, w10, x24 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x21, x13, x27, lsr #25 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x2, x12, x21, lsr #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x12, x19, x2, lsr #25 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x9, x15, x12, lsr #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + ushr v26.2D, v30.2D, #1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................. + and v22.16B, v29.16B, v26.16B // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................. + umlal v28.2D, v5.2S, v10.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v28.2D, v7.2S, v18.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................... + umlal v28.2D, v4.2S, v17.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................. + umlal v21.2D, v1.2S, v12.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................... + usra v28.2D, v29.2D, #25 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................. + umlal v11.2D, v5.2S, v12.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v11.2D, v7.2S, v10.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v11.2D, v4.2S, v18.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v23.2D, v13.2S, v17.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................... + usra v23.2D, v28.2D, #26 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................... + usra v11.2D, v23.2D, #25 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................ + umlal v21.2D, v15.2S, v10.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v21.2D, v13.2S, v18.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................. + usra v21.2D, v11.2D, #26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................... + umlal v20.2D, v7.2S, v12.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v20.2D, v4.2S, v10.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................ + usra v20.2D, v21.2D, #25 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................ + umlal v25.2D, v13.2S, v10.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................ + usra v25.2D, v20.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................. + and v24.16B, v25.16B, v26.16B // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................... + and v20.16B, v20.16B, v30.16B // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................ + trn1 v16.4S, v20.4S, v24.4S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................... + ld1r {v20.2D}, [sp] // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................... // @slothy:reads=mask1 + add v29.2S, v16.2S, v20.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................. + mov v24.d[0], v16.d[1] // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................. + sub v29.2S, v29.2S, v24.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................... + add v24.2S, v16.2S, v24.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................... + and v16.16B, v28.16B, v30.16B // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................... + and v28.16B, v23.16B, v26.16B // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................... + uzp1 v23.4S, v16.4S, v28.4S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................... + ldr b16, [sp, #STACK_MASK2] // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................... // @slothy:reads=mask2 + and v11.16B, v11.16B, v30.16B // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................... + and v21.16B, v21.16B, v26.16B // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................... + uzp1 v21.4S, v11.4S, v21.4S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................... + uzp1 v11.4S, v23.4S, v21.4S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................ + add v28.4S, v11.4S, v20.4S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................... + uzp2 v21.4S, v23.4S, v21.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................... + sub v28.4S, v28.4S, v21.4S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................ + mov v20.b[0], v16.b[0] // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................. + add v21.4S, v11.4S, v21.4S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................... + mul v12.2S, v12.2S, v31.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................... + mul v11.2S, v6.2S, v31.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................ + umlal v27.2D, v13.2S, v11.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................... + shl v6.2D, v27.2D, #1 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................. + umlal v6.2D, v0.2S, v10.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................... + umlal v6.2D, v2.2S, v18.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................... + bic v0.16B, v25.16B, v26.16B // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................... + umlal v6.2D, v5.2S, v17.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................... + umlal v6.2D, v7.2S, v14.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................ + umlal v6.2D, v4.2S, v12.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................... + usra v6.2D, v0.2D, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................... + usra v6.2D, v0.2D, #24 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................... + usra v6.2D, v0.2D, #21 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................. + and v7.16B, v6.16B, v30.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................ + umlal v19.2D, v3.2S, v18.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................... + umlal v19.2D, v1.2S, v17.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................... + umlal v19.2D, v15.2S, v14.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................. + umlal v19.2D, v13.2S, v12.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................. + usra v19.2D, v6.2D, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................. + and v12.16B, v19.16B, v26.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................. + usra v8.2D, v19.2D, #25 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................... + ldr x8, [sp, #STACK_CTR] // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ // @slothy:reads=[ctr,lastbit] + and v25.16B, v8.16B, v30.16B // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................ + usra v22.2D, v8.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................. + uzp1 v18.4S, v25.4S, v22.4S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................... + add x13, sp, #STACK_SCALAR // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + subs w0, w8, #1 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + asr w19, w0, #5 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + and w5, w0, #0x1f // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x15, x12, #0x3ffffff // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + ldr w13, [x13, w19, SXTW #2] // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x12, x1, #0x3ffffff // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x12, x11, #32, #25 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + and x21, x21, #0x3ffffff // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x21, x2, #32, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x15, x9, #32, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + uzp1 v25.4S, v7.4S, v12.4S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................... + lsr w11, w13, w5 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + stp w0, w11, [sp, #STACK_CTR] // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................ // @slothy:writes=[ctr,lastbit] + zip1 v10.4S, v28.4S, v21.4S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................... + zip2 v12.4S, v28.4S, v21.4S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................. + and x9, x20, #0x3ffffff // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + bfi x9, x27, #32, #25 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + lsr x2, x8, #32 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + uzp2 v0.4S, v25.4S, v18.4S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................ + uzp1 v25.4S, v25.4S, v18.4S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................. + add v18.4S, v25.4S, v0.4S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................ + add v25.4S, v25.4S, v20.4S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................... + sub v25.4S, v25.4S, v0.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................... + zip1 v6.4S, v25.4S, v18.4S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................... + zip2 v0.4S, v25.4S, v18.4S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................... + mov v9.d[0], v6.d[1] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................... + zip1 v4.2S, v29.2S, v24.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................. + zip2 v5.2S, v29.2S, v24.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................ + add x8, sp, #STACK_B_0 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................... + mov v15.d[0], v12.d[1] // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................ + shl v24.2S, v15.2S, #1 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................... + shl v8.2S, v9.2S, #1 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................... + shl v16.2S, v4.2S, #1 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................... + umull v23.2D, v6.2S, v16.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................. + umlal v23.2D, v8.2S, v24.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................ + mov v3.d[0], v0.d[1] // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................. + shl v22.2S, v3.2S, #1 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................... + shl v13.2S, v12.2S, #1 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................ + umlal v23.2D, v0.2S, v13.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................... + mov v1.d[0], v10.d[1] // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................. + shl v11.2S, v1.2S, #1 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................... + shl v7.2S, v5.2S, #1 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................... + mul v2.2S, v5.2S, v31.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................ + umlal v23.2D, v22.2S, v11.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................... + umlal v23.2D, v10.2S, v10.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................... + umlal v23.2D, v2.2S, v7.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................... + umull v28.2D, v6.2S, v6.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................... + mul v18.2S, v1.2S, v31.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................ + shl v25.2S, v8.2S, #1 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................... + umull v1.2D, v6.2S, v7.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................. + umlal v1.2D, v9.2S, v16.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................. + umlal v1.2D, v0.2S, v24.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................ + umlal v1.2D, v3.2S, v13.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................... + umlal v1.2D, v10.2S, v11.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................. + usra v1.2D, v23.2D, #26 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................. + bic v29.16B, v1.16B, v26.16B // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................... + usra v28.2D, v29.2D, #25 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................. + usra v28.2D, v29.2D, #24 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................... + usra v28.2D, v29.2D, #21 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................... + umlal v28.2D, v18.2S, v11.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................... + umlal v28.2D, v2.2S, v25.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................. + mul v21.2S, v4.2S, v31.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................ + mul v20.2S, v15.2S, v31.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................ + shl v17.2S, v22.2S, #1 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................... + shl v14.2S, v0.2S, #1 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................... + umlal v28.2D, v21.2S, v14.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................. + umlal v28.2D, v20.2S, v17.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................ + umull v7.2D, v6.2S, v13.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................. + umlal v7.2D, v8.2S, v11.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................. + shl v27.2S, v10.2S, #1 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................... + umull v5.2D, v6.2S, v11.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................. + umlal v5.2D, v9.2S, v27.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................. + umlal v5.2D, v0.2S, v22.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................ + umlal v7.2D, v0.2S, v27.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................ + umlal v7.2D, v22.2S, v3.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................... + umlal v5.2D, v2.2S, v13.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................. + umlal v7.2D, v21.2S, v4.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................... + umull v10.2D, v6.2S, v24.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................... + umlal v10.2D, v9.2S, v13.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................... + umlal v10.2D, v0.2S, v11.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................... + umlal v10.2D, v3.2S, v27.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................... + umlal v10.2D, v2.2S, v16.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................... + shl v25.2S, v24.2S, #1 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................... + umlal v7.2D, v2.2S, v25.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................... + umull v16.2D, v6.2S, v14.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................... + umlal v16.2D, v8.2S, v9.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................... + mul v25.2S, v12.2S, v31.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................... + umlal v16.2D, v25.2S, v12.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................... + umlal v16.2D, v2.2S, v17.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................... + umlal v16.2D, v21.2S, v27.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................ + umull v12.2D, v6.2S, v8.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................... + umlal v12.2D, v2.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................. + umlal v12.2D, v21.2S, v22.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................. + shl v18.2S, v11.2S, #1 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................... + umlal v16.2D, v20.2S, v18.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................... + umlal v28.2D, v25.2S, v27.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................... + umlal v12.2D, v20.2S, v27.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................ + umlal v12.2D, v25.2S, v11.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................... + usra v12.2D, v28.2D, #26 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................... + usra v16.2D, v12.2D, #25 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................. + umlal v5.2D, v21.2S, v24.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................ + and v15.16B, v23.16B, v30.16B // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................. + umull v25.2D, v6.2S, v22.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................ + umlal v25.2D, v9.2S, v14.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................... + umlal v25.2D, v2.2S, v27.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................... + umlal v25.2D, v21.2S, v11.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................... + umlal v25.2D, v20.2S, v13.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................... + usra v25.2D, v16.2D, #26 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................. + and v9.16B, v1.16B, v26.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................. + umull v4.2D, v6.2S, v27.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................... + umlal v4.2D, v8.2S, v22.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................... + umlal v4.2D, v0.2S, v0.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................... + umlal v4.2D, v20.2S, v24.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................. + umlal v4.2D, v2.2S, v18.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................ + umlal v4.2D, v21.2S, v13.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................... + usra v4.2D, v25.2D, #25 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................... + usra v5.2D, v4.2D, #26 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................ + usra v7.2D, v5.2D, #25 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................... + usra v10.2D, v7.2D, #26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................... + usra v15.2D, v10.2D, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................... + and v8.16B, v15.16B, v30.16B // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................... + and v5.16B, v5.16B, v26.16B // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................. + and v3.16B, v25.16B, v26.16B // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................... + and v6.16B, v7.16B, v30.16B // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................... + and v7.16B, v10.16B, v26.16B // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................. + ld2 { v10.S, v11.S }[1], [x8], #8 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................. // @slothy:reads=[B0] + and v1.16B, v12.16B, v26.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................. + ld2 { v25.S, v26.S }[1], [x8], #8 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................. // @slothy:reads=[B8] + ld2 { v12.S, v13.S }[1], [x8], #8 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................ // @slothy:reads=[B16] + usra v9.2D, v15.2D, #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................... + and v0.16B, v28.16B, v30.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................ + add x13, sp, #STACK_X_0 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................. + ld2 { v14.S, v15.S }[1], [x8], #8 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................... // @slothy:reads=[B24] + and v4.16B, v4.16B, v30.16B // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................. + and v2.16B, v16.16B, v30.16B // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................ + add x28, sp, #STACK_A_0 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................... + ld2 { v0.S, v1.S }[1], [x28], #8 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................ // @slothy:reads=[A0] + ld2 { v2.S, v3.S }[1], [x28], #8 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................... // @slothy:reads=[A8] + ld2 { v10.S, v11.S }[0], [x13], #8 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................ // @slothy:reads=[X0] + ld2 { v4.S, v5.S }[1], [x28], #8 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................... // @slothy:reads=[A16] + ld2 { v6.S, v7.S }[1], [x28], #8 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................... // @slothy:reads=[A24] + ld2 { v8.S, v9.S }[1], [x28], #8 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................... // @slothy:reads=[A32] + ld2 { v25.S, v26.S }[0], [x13], #8 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................... // @slothy:reads=[X8] + ld2 { v12.S, v13.S }[0], [x13], #8 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................... // @slothy:reads=[X16] + ld2 { v17.S, v18.S }[1], [x8], #8 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................. // @slothy:reads=[B32] + ld2 { v14.S, v15.S }[0], [x13], #8 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................... // @slothy:reads=[X24] + mul v23.2S, v15.2S, v31.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................. + ld2 { v17.S, v18.S }[0], [x13], #8 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................ // @slothy:reads=[X32] + mul v21.2S, v18.2S, v31.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................. + umull v20.2D, v0.2S, v18.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................... + umull v28.2D, v1.2S, v15.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................. + umlal v28.2D, v3.2S, v13.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................ + umlal v28.2D, v5.2S, v26.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................... + umlal v20.2D, v2.2S, v15.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................... + umull v15.2D, v0.2S, v15.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................... + umlal v15.2D, v2.2S, v13.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................... + umlal v15.2D, v4.2S, v26.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................... + umlal v15.2D, v6.2S, v11.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................... + umlal v15.2D, v8.2S, v21.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................. + umlal v28.2D, v7.2S, v11.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................. + umlal v28.2D, v9.2S, v21.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................. + shl v22.2D, v28.2D, #1 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................... + umull v24.2D, v1.2S, v26.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................... + umull v16.2D, v1.2S, v13.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................... + umull v28.2D, v0.2S, v13.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................... + umlal v24.2D, v3.2S, v11.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................. + umlal v16.2D, v3.2S, v26.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................... + umlal v24.2D, v5.2S, v21.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................. + umlal v24.2D, v7.2S, v23.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................ + umlal v16.2D, v5.2S, v11.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................ + umlal v20.2D, v4.2S, v13.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................... + umlal v16.2D, v7.2S, v21.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................... + mul v13.2S, v13.2S, v31.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................... + umlal v22.2D, v0.2S, v17.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................ + umlal v16.2D, v9.2S, v23.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................... + umlal v28.2D, v2.2S, v26.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................. + umlal v20.2D, v6.2S, v26.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................... + umlal v28.2D, v4.2S, v11.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................. + umlal v20.2D, v8.2S, v11.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................... + shl v27.2D, v16.2D, #1 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................ + umlal v20.2D, v1.2S, v17.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................ + umlal v28.2D, v6.2S, v21.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................ + mul v16.2S, v14.2S, v31.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................... + umull v19.2D, v0.2S, v26.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................ + umlal v20.2D, v3.2S, v14.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................... + umlal v19.2D, v2.2S, v11.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................... + umlal v28.2D, v8.2S, v23.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................... + umlal v27.2D, v0.2S, v14.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................... + umlal v19.2D, v4.2S, v21.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................... + umlal v15.2D, v1.2S, v14.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................... + umlal v19.2D, v6.2S, v23.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................... + umlal v24.2D, v9.2S, v13.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................... + mul v18.2S, v17.2S, v31.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................... + umlal v28.2D, v1.2S, v12.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................. + umlal v27.2D, v2.2S, v12.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................ + umlal v19.2D, v8.2S, v13.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................... + umlal v28.2D, v3.2S, v25.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................. + mul v26.2S, v26.2S, v31.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................... + shl v17.2D, v24.2D, #1 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................... + umlal v17.2D, v0.2S, v12.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................. + umull v29.2D, v1.2S, v11.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................ + umlal v29.2D, v3.2S, v21.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................... + umlal v22.2D, v2.2S, v14.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................... + umlal v19.2D, v1.2S, v25.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................... + umlal v19.2D, v3.2S, v10.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................... + umlal v29.2D, v5.2S, v23.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................... + umlal v29.2D, v7.2S, v13.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................... + umlal v29.2D, v9.2S, v26.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................ + mul v14.2S, v12.2S, v31.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................. + umlal v19.2D, v5.2S, v18.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................... + umlal v19.2D, v7.2S, v16.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................... + umlal v19.2D, v9.2S, v14.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................... + shl v29.2D, v29.2D, #1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................. + umull v24.2D, v0.2S, v11.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................. + umlal v29.2D, v0.2S, v25.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................... + umlal v24.2D, v2.2S, v21.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................ + umlal v24.2D, v4.2S, v23.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................... + umlal v28.2D, v5.2S, v10.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................ + umlal v24.2D, v6.2S, v13.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................. + umlal v17.2D, v2.2S, v25.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................. + umlal v17.2D, v4.2S, v10.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................ + umlal v29.2D, v2.2S, v10.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................... + umlal v29.2D, v4.2S, v18.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................... + mul v11.2S, v11.2S, v31.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................ + umlal v15.2D, v3.2S, v12.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................... + umlal v15.2D, v5.2S, v25.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................... + umlal v24.2D, v8.2S, v26.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................. + umlal v24.2D, v1.2S, v10.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................... + umlal v29.2D, v6.2S, v16.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................ + umlal v29.2D, v8.2S, v14.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................... + umull v21.2D, v1.2S, v21.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................... + umlal v21.2D, v3.2S, v23.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................... + umlal v21.2D, v5.2S, v13.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................... + umlal v21.2D, v7.2S, v26.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................ + umlal v21.2D, v9.2S, v11.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................... + shl v21.2D, v21.2D, #1 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................. + umlal v21.2D, v0.2S, v10.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................... + umlal v21.2D, v2.2S, v18.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................... + umlal v20.2D, v5.2S, v12.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................... + umlal v21.2D, v4.2S, v16.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................... + umlal v21.2D, v6.2S, v14.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................ + umlal v24.2D, v3.2S, v18.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................... + umlal v24.2D, v5.2S, v16.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................... + umlal v22.2D, v4.2S, v12.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................. + umlal v27.2D, v4.2S, v25.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................... + trn1 v4.4S, v4.4S, v5.4S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................... + mov v5.d[0], x15 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......... + trn1 v2.4S, v2.4S, v3.4S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................... + mov v3.d[0], x21 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......... + umlal v27.2D, v6.2S, v10.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................... + umlal v27.2D, v8.2S, v18.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................... + mul v12.2S, v25.2S, v31.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................... + umlal v21.2D, v8.2S, v12.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................... + umlal v17.2D, v6.2S, v18.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................... + umlal v17.2D, v8.2S, v16.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................. + umlal v22.2D, v6.2S, v25.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................. + umlal v22.2D, v8.2S, v10.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................ + trn1 v8.4S, v8.4S, v9.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................... + umlal v28.2D, v7.2S, v18.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................... + umlal v28.2D, v9.2S, v16.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................... + umlal v20.2D, v7.2S, v25.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................... + umlal v20.2D, v9.2S, v10.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................ + umlal v15.2D, v7.2S, v10.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................. + umlal v15.2D, v9.2S, v18.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................. + umlal v24.2D, v7.2S, v14.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................. + umlal v24.2D, v9.2S, v12.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................. + mov v9.d[0], x4 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....... + usra v19.2D, v29.2D, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................. + trn1 v6.4S, v6.4S, v7.4S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................ + mov v7.d[0], x12 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........ + usra v17.2D, v19.2D, #25 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................. + and v11.16B, v17.16B, v30.16B // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................... + usra v28.2D, v17.2D, #26 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................... + usra v27.2D, v28.2D, #25 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................ + ushr v13.2D, v30.2D, #1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................. + and v23.16B, v28.16B, v13.16B // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................... + ldr d28, [sp, #STACK_MASK2] // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...... // @slothy:reads=mask2 + trn1 v0.4S, v0.4S, v1.4S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................... + mov v1.d[0], x9 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........... + usra v15.2D, v27.2D, #26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................... + usra v22.2D, v15.2D, #25 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................ + usra v20.2D, v22.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................. + bic v17.16B, v20.16B, v13.16B // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................... + usra v21.2D, v17.2D, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................... + usra v21.2D, v17.2D, #24 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................... + usra v21.2D, v17.2D, #21 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................. + usra v24.2D, v21.2D, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................. + and v26.16B, v24.16B, v13.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................. + and v25.16B, v29.16B, v30.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................ + usra v25.2D, v24.2D, #25 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................... + and v17.16B, v19.16B, v13.16B // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................. + usra v17.2D, v25.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................. + and v22.16B, v22.16B, v30.16B // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................ + and v21.16B, v21.16B, v30.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................ + and v15.16B, v15.16B, v13.16B // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................... + and v24.16B, v20.16B, v13.16B // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................... + trn1 v19.4S, v22.4S, v24.4S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................. + and v27.16B, v27.16B, v30.16B // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................... + and v20.16B, v25.16B, v30.16B // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................ + trn1 v13.4S, v20.4S, v17.4S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................... + mov v12.d[0], v2.d[1] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............... + ldr d29, [sp, #STACK_MASK1] // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..... // @slothy:reads=mask1 + mov v2.d[0], v13.d[1] // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*... + mov v16.d[0], v6.d[1] // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............. + eor w1, w11, w2 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................. + mov v10.d[0], v0.d[1] // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................ + trn1 v17.4S, v27.4S, v15.4S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................... + mov v6.d[0], v17.d[1] // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*. + mov v14.d[0], v4.d[1] // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............. + mov v18.d[0], v8.d[1] // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............ + mov v8.d[0], v19.d[1] // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................* + trn1 v15.4S, v11.4S, v23.4S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................... + mov v4.d[0], v15.d[1] // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.. + trn1 v11.4S, v21.4S, v26.4S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................... + mov v0.d[0], v11.d[1] // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.... + + // original source code + // tst w1, #1 // .....*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // sub v25.2S, v28.2S, v1.2S // ..........................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v27.2S, v29.2S, v3.2S // ...............*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // sub v20.2S, v29.2S, v5.2S // ...*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v21.2S, v29.2S, v7.2S // .............*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // sub v26.2S, v29.2S, v9.2S // ............*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v25.2S, v0.2S, v25.2S // ...........................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v27.2S, v2.2S, v27.2S // .........................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v20.2S, v4.2S, v20.2S // ....*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v21.2S, v6.2S, v21.2S // ........................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v26.2S, v8.2S, v26.2S // .......................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v22.2S, v28.2S, v11.2S // .................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // sub v23.2S, v29.2S, v13.2S // *............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // sub v28.2S, v29.2S, v15.2S // ..............*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v24.2S, v29.2S, v17.2S // ...........*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // sub v29.2S, v29.2S, v19.2S // ..........*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v22.2S, v10.2S, v22.2S // .............................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add v23.2S, v12.2S, v23.2S // .........*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v28.2S, v14.2S, v28.2S // ...................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v24.2S, v16.2S, v24.2S // ...............................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v29.2S, v18.2S, v29.2S // ................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v0.2S, v0.2S, v1.2S // ....................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v2.2S, v2.2S, v3.2S // ......*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v5.2S, v4.2S, v5.2S // ..........................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v7.2S, v6.2S, v7.2S // .................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add v4.2S, v8.2S, v9.2S // .....................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add v10.2S, v10.2S, v11.2S // ..*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v12.2S, v12.2S, v13.2S // .*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add v14.2S, v14.2S, v15.2S // ......................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v17.2S, v16.2S, v17.2S // ................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v18.2S, v18.2S, v19.2S // ..................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel_dform v9, v0, v10, eq // ............................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // fcsel_dform v3, v2, v12, eq // .......*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel_dform v1, v5, v14, eq // ...........................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // fcsel_dform v15, v7, v17, eq // ..................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel_dform v13, v4, v18, eq // .......................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov x4, v9.d[0] // ....................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov x9, v3.d[0] // ........*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov x12, v1.d[0] // ............................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // mov x21, v15.d[0] // .....................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mov x11, v13.d[0] // ........................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn2 v9.2S, v0.2S, v25.2S // ........................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v0.2S, v0.2S, v25.2S // .............................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v3.2S, v2.2S, v27.2S // ........................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v2.2S, v2.2S, v27.2S // ...................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn2 v1.2S, v5.2S, v20.2S // .................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn1 v5.2S, v5.2S, v20.2S // ..................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn2 v15.2S, v7.2S, v21.2S // ...............................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // trn1 v7.2S, v7.2S, v21.2S // ....................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn2 v13.2S, v4.2S, v26.2S // ................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // trn1 v4.2S, v4.2S, v26.2S // .......................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel_dform v25, v25, v22, eq // ..............................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel_dform v27, v27, v23, eq // ...............................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // fcsel_dform v20, v20, v28, eq // ...................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel_dform v21, v21, v24, eq // ................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // fcsel_dform v26, v26, v29, eq // .....................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v6.2S, v22.2S, v10.2S // ...............................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // trn1 v10.2S, v22.2S, v10.2S // ................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // trn2 v22.2S, v23.2S, v12.2S // ..................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v12.2S, v23.2S, v12.2S // .................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v16.2S, v28.2S, v14.2S // ..............................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v14.2S, v28.2S, v14.2S // ......................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn2 v11.2S, v24.2S, v17.2S // .................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn1 v17.2S, v24.2S, v17.2S // ..................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn2 v23.2S, v29.2S, v18.2S // .........................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v18.2S, v29.2S, v18.2S // .......................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // stp d25, d27, [sp, #STACK_B_0] // ................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // stp d20, d21, [sp, #STACK_B_16] // .................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // str d26, [sp, #STACK_B_32] // ................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // lsr x15, x4, #32 // ......................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x2, x9, #32 // ...........................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // lsr x13, x12, #32 // .............................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // lsr x1, x21, #32 // ......................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x19, x11, #32 // .........................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x0, x19, x19 // .........................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x5, x11, x11 // ............................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x26, x1, x1 // ......................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x16, x21, x21 // ........................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x6, x13, x13 // ..............................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x7, x12, x12 // ..........................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x10, x2, x2 // ............................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x25, x9, x9 // ................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x20, x15, x15 // .......................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x27, w12, w12 // .....................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x12, w12, w6 // ...........................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // mul w19, w19, w30 // ...................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w1, w1, w30 // ......................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w13, w13, w30 // .............................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x27, w19, w0, x27 // ....................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w4, w0, x12 // .............................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x0, w4, w4 // ...............................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull x24, w4, w20 // ..............................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x14, w4, w25 // ........................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x22, w4, w10 // ...........................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull x17, w4, w7 // ...............................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull x23, w4, w6 // ..........................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x18, w4, w16 // ..................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x3, w4, w26 // .....................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x4, w4, w5, x27 // .....................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mul w27, w21, w30 // .......................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x14, w15, w20, x14 // .........................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x22, w15, w25, x22 // ............................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x17, w20, w10, x17 // ................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x23, w15, w7, x23 // ..................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x18, w20, w6, x18 // ...................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x3, w15, w16, x3 // .........................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w20, w26, x4 // .........................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w15, w5, x12 // ..............................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w15, w11, w30 // ............................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x17, w9, w9, x17 // .................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x23, w9, w10, x23 // ...................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x18, w9, w7, x18 // ....................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x3, w9, w6, x3 // ..........................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w9, w16, x4 // ..........................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w9, w26, x12 // .......................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w2, w10, x18 // .............................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x18, w2, w7, x3 // ...........................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x4, w10, w6, x4 // ..............................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w2, w16, x9 // ..................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w11, w15, x12 // ..............................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x21, w21, w27, x14 // ..........................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x9, x9, x4, lsr #26 // ...................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w13, w6, x0 // .................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x11, x11, x9, lsr #25 // ....................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // bic x2, x9, #0x1ffffff // .....................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x11, x11, x2, lsr #24 // ......................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x9, x9, #0x1ffffff // ......................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x11, x11, x2, lsr #21 // .........................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x2, w1, w26, x17 // ..................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x13, x20, x20 // ................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x0, x10, x10 // ........................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x20, x6, x6 // .....................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x14, x26, x26 // ...............................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x11, w27, w7, x11 // ..........................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x27, w27, w6, x24 // ...............................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x4, x4, #0x3ffffff // ................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x11, w1, w0, x11 // ...........................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x27, w1, w7, x27 // ..................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x21, w1, w20, x21 // ...........................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x1, w1, w16, x22 // .............................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x11, w15, w25, x11 // ............................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x10, w15, w10, x27 // ...................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x21, w15, w7, x21 // .......................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w15, w6, x1 // ..............................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x2, w15, w16, x2 // ...................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w15, w26, x23 // ....................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w19, w13, x11 // .................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x13, w19, w25, x10 // .....................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x21, w19, w0, x21 // ........................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w19, w7, x1 // ...............................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x2, w19, w20, x2 // ....................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w19, w16, x15 // ......................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w19, w14, x12 // ....................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x19, w19, w5, x18 // .............................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x13, x13, x11, lsr #26 // ......................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x0, x11, #0x3ffffff // ............................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x21, x21, x13, lsr #25 // .......................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // bfi x0, x13, #32, #25 // ..........................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x11, x1, x21, lsr #26 // ...........................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x21, x21, #0x3ffffff // ....................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x2, x2, x11, lsr #25 // ............................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // bfi x21, x11, #32, #25 // .......................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x11, x15, x2, lsr #26 // .............................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // and x1, x2, #0x3ffffff // ...........................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x12, x12, x11, lsr #25 // ..............................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // bfi x1, x11, #32, #25 // ............................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x11, x19, x12, lsr #26 // ...............................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x12, x12, #0x3ffffff // .....................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x4, x4, x11, lsr #25 // ........................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // bfi x12, x11, #32, #25 // ......................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x9, x9, x4, lsr #26 // ............................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x4, x4, #0x3ffffff // .....................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // bfi x4, x9, #32, #26 // ..............................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // stp x0, x21, [sp, #STACK_A_0] // ....................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // stp x1, x12, [sp, #STACK_A_16] // ...................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // str x4, [sp, #STACK_A_32] // ...............................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // ldr x4, [sp, #STACK_B_0] // .................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // ldr x9, [sp, #STACK_B_8] // .............................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // ldr x12, [sp, #STACK_B_16] // ...............................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // ldr x21, [sp, #STACK_B_24] // .......................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // ldr x11, [sp, #STACK_B_32] // .................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // lsr x15, x4, #32 // ........................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x2, x9, #32 // ..............................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x13, x12, #32 // ................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // lsr x1, x21, #32 // ........................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x19, x11, #32 // ..................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x0, x19, x19 // ...................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x5, x11, x11 // .........................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x26, x1, x1 // .........................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x16, x21, x21 // ...........................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x6, x13, x13 // ....................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x7, x12, x12 // ...................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x10, x2, x2 // ..................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x25, x9, x9 // .........................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x20, x15, x15 // .........................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x27, w12, w12 // ......................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x12, w12, w6 // ................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // mul w19, w19, w30 // ..................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w1, w1, w30 // ..............................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w13, w13, w30 // .................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x27, w19, w0, x27 // .......................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w4, w0, x12 // .................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x0, w4, w4 // ..................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x24, w4, w20 // ........................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x14, w4, w25 // ..........................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x22, w4, w10 // ................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull x17, w4, w7 // .............................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x23, w4, w6 // ...................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x18, w4, w16 // .................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x3, w4, w26 // ..........................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w4, w5, x27 // ........................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w27, w21, w30 // ............................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x14, w15, w20, x14 // ...........................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x22, w15, w25, x22 // .................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x17, w20, w10, x17 // ..............................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x23, w15, w7, x23 // ....................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x18, w20, w6, x18 // .....................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x3, w15, w16, x3 // ................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x4, w20, w26, x4 // .........................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w15, w5, x12 // ..................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w15, w11, w30 // ................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x17, w9, w9, x17 // ...............................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x23, w9, w10, x23 // .....................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x18, w9, w7, x18 // ......................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x3, w9, w6, x3 // .................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x4, w9, w16, x4 // ..........................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w9, w26, x12 // ...................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w2, w10, x18 // .......................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x18, w2, w7, x3 // ..................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w10, w6, x4 // ...........................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w2, w16, x9 // ....................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w11, w15, x12 // ........................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x21, w21, w27, x14 // .............................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x9, x9, x4, lsr #26 // ............................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x11, w13, w6, x0 // .....................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x11, x11, x9, lsr #25 // .............................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // bic x2, x9, #0x1ffffff // .....................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x11, x11, x2, lsr #24 // ......................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x9, x9, #0x1ffffff // ............................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x11, x11, x2, lsr #21 // .......................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x2, w1, w26, x17 // ....................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x13, x20, x20 // ..........................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x0, x10, x10 // ..............................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x20, x6, x6 // ...............................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x14, x26, x26 // .............................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x11, w27, w7, x11 // ........................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x27, w27, w6, x24 // .........................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x4, x4, #0x3ffffff // .....................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x11, w1, w0, x11 // .........................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x27, w1, w7, x27 // ..........................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x21, w1, w20, x21 // ................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x1, w1, w16, x22 // ..................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w15, w25, x11 // ..........................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x10, w15, w10, x27 // ...........................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x21, w15, w7, x21 // .................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x1, w15, w6, x1 // ...................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x2, w15, w16, x2 // .........................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w15, w26, x23 // ......................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w19, w13, x11 // ...........................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x13, w19, w25, x10 // ............................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x21, w19, w0, x21 // ...............................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x1, w19, w7, x1 // ....................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x2, w19, w20, x2 // ..........................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w19, w16, x15 // .......................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w19, w14, x12 // ..............................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x19, w19, w5, x18 // ...................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x13, x13, x11, lsr #26 // ............................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x0, x11, #0x3ffffff // ...........................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x21, x21, x13, lsr #25 // .............................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // bfi x0, x13, #32, #25 // ......................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x11, x1, x21, lsr #26 // ..............................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x21, x21, #0x3ffffff // .....................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x2, x2, x11, lsr #25 // ...........................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // bfi x21, x11, #32, #25 // ......................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x15, x15, x2, lsr #26 // ............................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x2, x2, #0x3ffffff // .......................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x12, x12, x15, lsr #25 // ...............................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // bfi x2, x15, #32, #25 // ........................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x1, x19, x12, lsr #26 // ....................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x12, x12, #0x3ffffff // .......................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x4, x4, x1, lsr #25 // ......................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // bfi x12, x1, #32, #25 // ........................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x9, x9, x4, lsr #26 // .............................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // and x4, x4, #0x3ffffff // ..........................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // bfi x4, x9, #32, #26 // ............................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // stp x0, x21, [sp, #STACK_B_0] // .......................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // stp x2, x12, [sp, #STACK_B_16] // ..................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // str x4, [sp, #STACK_B_32] // .................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // ldr x19, [sp, #STACK_A_0] // ....................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // ldr x5, [sp, #STACK_A_8] // ..................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // ldr x26, [sp, #STACK_A_16] // ..........................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // ldr x16, [sp, #STACK_A_24] // ..............................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // ldr x6, [sp, #STACK_A_32] // ................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // ldr x8, #=0x07fffffe07fffffc // ...............................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x5, x5, x8 // ...................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x26, x26, x8 // ...........................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x16, x16, x8 // ...................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x6, x6, x8 // .................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // movk x8, #0xffb4 // ..............................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x19, x19, x8 // .....................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // sub x19, x19, x0 // ..............................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub x5, x5, x21 // .......................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub x26, x26, x2 // ....................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub x16, x16, x12 // .........................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub x6, x6, x4 // ......................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x13, x13, #0x1ffffff // ........................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x11, x11, #0x1ffffff // .........................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x15, x15, #0x1ffffff // .............................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // and x1, x1, #0x1ffffff // .............................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mov w0, w0 // ........................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov w21, w21 // ......................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov w2, w2 // ...............................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // mov w12, w12 // .........................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov w4, w4 // ................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // lsr x7, x19, #32 // ................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // lsr x10, x5, #32 // ..................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x25, x26, #32 // ...................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x20, x16, #32 // .............................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // lsr x27, x6, #32 // .......................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // ldr x24, #=121666 // .....................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x9, w27, w24, x9 // ........................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x0, w19, w24, x0 // ...............................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x13, w7, w24, x13 // .................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x21, w5, w24, x21 // ............................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // lsr x14, x9, #25 // .........................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w10, w24, x11 // ...................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x9, x9, #0x1ffffff // ..............................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x2, w26, w24, x2 // ...........................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x0, x0, x14 // ..........................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w25, w24, x15 // .....................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x0, x0, x14, lsl #1 // ...........................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x12, w16, w24, x12 // ..........................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x0, x0, x14, lsl #4 // ............................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x1, w20, w24, x1 // ....................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w6, w24, x4 // .............................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x13, x13, x0, lsr #26 // ...............................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x0, x0, #0x3ffffff // ........................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x21, x21, x13, lsr #25 // ................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x13, x13, #0x1ffffff // .......................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x11, x11, x21, lsr #26 // .................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // and x21, x21, #0x3ffffff // ............................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x2, x2, x11, lsr #25 // ..................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x11, x11, #0x1ffffff // ...........................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x15, x15, x2, lsr #26 // ...................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x2, x2, #0x3ffffff // ..........................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x12, x12, x15, lsr #25 // ....................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x15, x15, #0x1ffffff // ...............................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x1, x1, x12, lsr #26 // .....................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // and x12, x12, #0x3ffffff // ..............................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x4, x4, x1, lsr #25 // .............................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // and x1, x1, #0x1ffffff // ......................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x9, x9, x4, lsr #26 // ................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // and x4, x4, #0x3ffffff // ..............................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w24, w13, w30 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // mul w14, w21, w30 // .................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mul w22, w11, w30 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w17, w15, w30 // .....................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mul w23, w12, w30 // ......................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w18, w1, w30 // .......................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w3, w4, w30 // ................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // mul w29, w9, w30 // .....................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x28, w13, w6 // ..........................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w11, w16, x28 // ......................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w15, w26, x28 // ...........................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w1, w5, x28 // ............................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w9, w19, x28 // ...............................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w0, w27, x9 // ................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w21, w20, x9 // .................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x9, w2, w25, x9 // .................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x9, w12, w10, x9 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w4, w7, x9 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x28, w13, w20 // .........................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w11, w25, x28 // ..................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w15, w10, x28 // ...................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w1, w7, x28 // ....................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w29, w27, x28 // ........................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x28, x28, x28 // .........................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w0, w6, x28 // .............................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x28, w21, w16, x28 // .........................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w2, w26, x28 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w12, w5, x28 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x4, w4, w19, x28 // ................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull x28, w13, w16 // ..................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w11, w26, x28 // ...................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w15, w5, x28 // .....................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x1, w1, w19, x28 // ..........................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w29, w6, x1 // .............................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x1, w0, w20, x1 // ..............................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w21, w25, x1 // .................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x1, w2, w10, x1 // ..................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w12, w7, x1 // ...................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w3, w27, x1 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x28, w13, w25 // ....................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w11, w10, x28 // ......................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w15, w7, x28 // .......................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w18, w27, x28 // ........................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w29, w20, x28 // ...........................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x28, x28, x28 // ............................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w0, w16, x28 // ...............................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w21, w26, x28 // .................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x28, w2, w5, x28 // ..................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w12, w19, x28 // ...................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x12, w3, w6, x12 // ....................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x28, w29, w16 // ....................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w15, w19, x28 // .....................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x15, w18, w6, x15 // ......................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w11, w5, x15 // .......................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w13, w26, x15 // ........................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w3, w20, x15 // .........................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w23, w27, x15 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x15, w2, w7, x15 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x15, w21, w10, x15 // ............................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x15, w0, w25, x15 // .............................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x28, w29, w25 // .......................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w18, w20, x28 // ........................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w17, w27, x28 // .........................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w11, w7, x28 // ..........................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w13, w10, x28 // ...........................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x28, x28, x28 // ............................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w3, w16, x28 // .............................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x28, w23, w6, x28 // ..............................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w2, w19, x28 // ...............................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w21, w5, x28 // ................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w0, w26, x28 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x8, w29, w26 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x8, w18, w16, x8 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x8, w17, w6, x8 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x11, w11, w19, x8 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x11, w13, w5, x11 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w2, w2, w30 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x11, w3, w25, x11 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w23, w20, x11 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w2, w27, x11 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w21, w7, x11 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w0, w10, x11 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................... + // add x15, x15, x28, lsr #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x28, x28, #0x3ffffff // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................... + // add x12, x12, x15, lsr #25 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x15, x15, #0x1ffffff // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x1, x1, x12, lsr #26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x12, x12, #0x3ffffff // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................... + // add x4, x4, x1, lsr #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // bfi x12, x1, #32, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................... + // add x9, x9, x4, lsr #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // and x4, x4, #0x3ffffff // ............................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // bic x1, x9, #0x3ffffff // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x8, x1, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // bfi x4, x9, #32, #26 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x9, x8, x1, lsr #25 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x9, x9, x1, lsr #22 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w29, w7, x9 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w18, w10, x9 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w17, w25, x9 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w22, w20, x9 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x9, w24, w27, x9 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x9, x9, x9 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w3, w5, x9 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w23, w26, x9 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x9, w2, w16, x9 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w14, w6, x9 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w0, w19, x9 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x1, w29, w5 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w18, w26, x1 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w17, w16, x1 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w22, w6, x1 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w13, w19, x1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x1, w3, w10, x1 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w23, w25, x1 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x24, w2, w20, x1 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w14, w27, x24 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x1, w0, w7, x1 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x10, w29, w10 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x10, w18, w25, x10 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x10, w17, w20, x10 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x10, w22, w27, x10 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x13, w13, w7, x10 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x13, x13, x13 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x13, w3, w26, x13 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x13, w23, w16, x13 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x2, w2, w6, x13 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x21, w21, w19, x2 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x21, w0, w5, x21 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x2, x1, x9, lsr #26 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................... + // and x9, x9, #0x3ffffff // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................... + // add x21, x21, x2, lsr #25 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................. + // bfi x9, x2, #32, #25 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................... + // add x11, x11, x21, lsr #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................. + // and x21, x21, #0x3ffffff // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................... + // add x2, x28, x11, lsr #25 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................ + // bfi x21, x11, #32, #25 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................. + // add x11, x15, x2, lsr #26 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................... + // and x15, x2, #0x3ffffff // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................... + // bfi x15, x11, #32, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................. + // ldr x11, [sp, #STACK_CTR] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................... + // lsr x2, x11, #32 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................... + // subs w0, w11, #1 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................... + // asr w11, w0, #5 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................... + // add x13, sp, #STACK_SCALAR // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................... + // ldr w11, [x13, w11, SXTW #2] // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................... + // and w13, w0, #0x1f // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................ + // lsr w11, w11, w13 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................... + // stp w0, w11, [sp, #STACK_CTR] // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................. + // umull v25.2D, v0.2S, v23.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v25.2D, v2.2S, v11.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v25.2D, v5.2S, v16.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v25.2D, v7.2S, v22.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v25.2D, v4.2S, v6.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mul v27.2S, v23.2S, v31.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v20.2D, v9.2S, v11.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v20.2D, v3.2S, v16.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v20.2D, v1.2S, v22.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v20.2D, v15.2S, v6.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v20.2D, v13.2S, v27.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v25.2D, v9.2S, v18.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v25.2D, v3.2S, v17.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v25.2D, v1.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v25.2D, v15.2S, v12.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v25.2D, v13.2S, v10.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................... + // shl v20.2D, v20.2D, #1 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v21.2D, v0.2S, v11.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v21.2D, v2.2S, v16.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v21.2D, v5.2S, v22.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v21.2D, v7.2S, v6.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v21.2D, v4.2S, v27.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v26.2S, v11.2S, v31.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v20.2D, v0.2S, v18.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v20.2D, v2.2S, v17.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v20.2D, v5.2S, v14.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v20.2D, v7.2S, v12.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................. + // umlal v20.2D, v4.2S, v10.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................. + // mul v18.2S, v18.2S, v31.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v11.2D, v9.2S, v16.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v11.2D, v3.2S, v22.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v11.2D, v1.2S, v6.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v11.2D, v15.2S, v27.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v11.2D, v13.2S, v26.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v21.2D, v9.2S, v17.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v21.2D, v3.2S, v14.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v21.2D, v1.2S, v12.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v21.2D, v15.2S, v10.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v21.2D, v13.2S, v18.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................ + // shl v11.2D, v11.2D, #1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull v23.2D, v0.2S, v16.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v23.2D, v2.2S, v22.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v23.2D, v5.2S, v6.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v23.2D, v7.2S, v27.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v23.2D, v4.2S, v26.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mul v16.2S, v16.2S, v31.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v11.2D, v0.2S, v17.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v11.2D, v2.2S, v14.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v11.2D, v5.2S, v12.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v11.2D, v7.2S, v10.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v11.2D, v4.2S, v18.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................... + // mul v17.2S, v17.2S, v31.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................. + // umull v28.2D, v9.2S, v22.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v28.2D, v3.2S, v6.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v28.2D, v1.2S, v27.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v28.2D, v15.2S, v26.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v28.2D, v13.2S, v16.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v23.2D, v9.2S, v14.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v23.2D, v3.2S, v12.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v23.2D, v1.2S, v10.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v23.2D, v15.2S, v18.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v23.2D, v13.2S, v17.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................... + // shl v28.2D, v28.2D, #1 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v29.2D, v0.2S, v22.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v29.2D, v2.2S, v6.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v29.2D, v5.2S, v27.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v29.2D, v7.2S, v26.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v29.2D, v4.2S, v16.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mul v22.2S, v22.2S, v31.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v28.2D, v0.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v28.2D, v2.2S, v12.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v28.2D, v5.2S, v10.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v28.2D, v7.2S, v18.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v28.2D, v4.2S, v17.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................... + // mul v14.2S, v14.2S, v31.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................. + // umull v8.2D, v9.2S, v6.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v3.2S, v27.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v1.2S, v26.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v15.2S, v16.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v8.2D, v13.2S, v22.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v29.2D, v9.2S, v12.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v29.2D, v3.2S, v10.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v29.2D, v1.2S, v18.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v29.2D, v15.2S, v17.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v29.2D, v13.2S, v14.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................ + // shl v8.2D, v8.2D, #1 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v19.2D, v0.2S, v6.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v19.2D, v2.2S, v27.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v19.2D, v5.2S, v26.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v19.2D, v7.2S, v16.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v19.2D, v4.2S, v22.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v6.2S, v6.2S, v31.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v0.2S, v12.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v8.2D, v2.2S, v10.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v5.2S, v18.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v7.2S, v17.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v8.2D, v4.2S, v14.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v12.2S, v12.2S, v31.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................... + // umull v27.2D, v9.2S, v27.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v27.2D, v3.2S, v26.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v27.2D, v1.2S, v16.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // ushr v26.2D, v30.2D, #1 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................. + // usra v29.2D, v8.2D, #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................... + // and v16.16B, v8.16B, v30.16B // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v19.2D, v9.2S, v10.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................... + // usra v28.2D, v29.2D, #25 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................ + // and v29.16B, v29.16B, v26.16B // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v27.2D, v15.2S, v22.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // usra v23.2D, v28.2D, #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................... + // and v22.16B, v28.16B, v30.16B // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................. + // umlal v19.2D, v3.2S, v18.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................... + // usra v11.2D, v23.2D, #25 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................. + // and v23.16B, v23.16B, v26.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................ + // umlal v27.2D, v13.2S, v6.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................. + // usra v21.2D, v11.2D, #26 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................... + // and v6.16B, v11.16B, v30.16B // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................. + // umlal v19.2D, v1.2S, v17.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................... + // umlal v19.2D, v15.2S, v14.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................... + // umlal v19.2D, v13.2S, v12.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................... + // usra v20.2D, v21.2D, #25 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................ + // and v21.16B, v21.16B, v26.16B // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................ + // shl v27.2D, v27.2D, #1 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................. + // usra v25.2D, v20.2D, #26 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................... + // and v20.16B, v20.16B, v30.16B // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................ + // umlal v27.2D, v0.2S, v10.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................ + // umlal v27.2D, v2.2S, v18.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................... + // umlal v27.2D, v5.2S, v17.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................. + // umlal v27.2D, v7.2S, v14.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................ + // umlal v27.2D, v4.2S, v12.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................... + // bic v18.16B, v25.16B, v26.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................. + // and v25.16B, v25.16B, v26.16B // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................... + // usra v27.2D, v18.2D, #25 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................... + // usra v27.2D, v18.2D, #24 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................... + // usra v27.2D, v18.2D, #21 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................ + // usra v19.2D, v27.2D, #26 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................. + // and v18.16B, v27.16B, v30.16B // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................... + // usra v16.2D, v19.2D, #25 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................ + // and v12.16B, v19.16B, v26.16B // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................. + // usra v29.2D, v16.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................. + // and v0.16B, v16.16B, v30.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................. + // uzp1 v5.4S, v22.4S, v23.4S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................... + // uzp1 v7.4S, v6.4S, v21.4S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................... + // ld1r {v4.2D}, [sp] // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................... + // uzp1 v17.4S, v5.4S, v7.4S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................... + // uzp2 v5.4S, v5.4S, v7.4S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................ + // trn1 v7.4S, v20.4S, v25.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................... + // ldr b27, [sp, #STACK_MASK2] // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................. + // uzp1 v18.4S, v18.4S, v12.4S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................ + // uzp1 v12.4S, v0.4S, v29.4S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................ + // mov v25.d[0], v7.d[1] // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................... + // uzp1 v0.4S, v18.4S, v12.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................... + // uzp2 v18.4S, v18.4S, v12.4S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................ + // add v12.4S, v17.4S, v4.4S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................... + // add v14.2S, v7.2S, v4.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................... + // mov v4.b[0], v27.b[0] // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................... + // add v27.4S, v0.4S, v18.4S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................... + // add v17.4S, v17.4S, v5.4S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................... + // add v7.2S, v7.2S, v25.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................. + // add v0.4S, v0.4S, v4.4S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................... + // sub v12.4S, v12.4S, v5.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................... + // sub v18.4S, v0.4S, v18.4S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................... + // sub v25.2S, v14.2S, v25.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................... + // zip1 v0.4S, v18.4S, v27.4S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................... + // zip2 v18.4S, v18.4S, v27.4S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................. + // zip1 v5.4S, v12.4S, v17.4S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................. + // zip2 v12.4S, v12.4S, v17.4S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................ + // zip1 v4.2S, v25.2S, v7.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................ + // zip2 v25.2S, v25.2S, v7.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................... + // mov v9.d[0], v0.d[1] // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................. + // mov v3.d[0], v18.d[1] // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................... + // mov v1.d[0], v5.d[1] // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................... + // mov v15.d[0], v12.d[1] // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................. + // shl v7.2S, v25.2S, #1 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................. + // shl v17.2S, v4.2S, #1 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................... + // shl v27.2S, v15.2S, #1 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................ + // shl v14.2S, v12.2S, #1 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................... + // shl v10.2S, v1.2S, #1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................. + // shl v20.2S, v5.2S, #1 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................... + // shl v21.2S, v3.2S, #1 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................... + // shl v2.2S, v18.2S, #1 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................... + // shl v22.2S, v9.2S, #1 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................... + // umull v13.2D, v0.2S, v7.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................... + // umlal v13.2D, v9.2S, v17.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................ + // umlal v13.2D, v18.2S, v27.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................... + // umlal v13.2D, v3.2S, v14.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................... + // umlal v13.2D, v5.2S, v10.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................... + // umull v6.2D, v0.2S, v17.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................... + // umlal v6.2D, v22.2S, v27.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................ + // umlal v6.2D, v18.2S, v14.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................... + // umlal v6.2D, v21.2S, v10.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................... + // umlal v6.2D, v5.2S, v5.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................. + // mul v25.2S, v25.2S, v31.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................ + // umull v16.2D, v0.2S, v27.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................. + // umlal v16.2D, v9.2S, v14.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................ + // umlal v16.2D, v18.2S, v10.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................... + // umlal v16.2D, v3.2S, v20.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................... + // umlal v6.2D, v25.2S, v7.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................. + // umull v7.2D, v0.2S, v14.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................... + // umlal v7.2D, v22.2S, v10.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................... + // umlal v7.2D, v18.2S, v20.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................. + // umlal v7.2D, v21.2S, v3.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................ + // umull v5.2D, v0.2S, v10.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................... + // umlal v5.2D, v9.2S, v20.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................... + // umlal v5.2D, v18.2S, v21.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................. + // umull v3.2D, v0.2S, v20.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................. + // umlal v3.2D, v22.2S, v21.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................ + // umlal v3.2D, v18.2S, v18.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................... + // umull v18.2D, v0.2S, v21.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................... + // umlal v18.2D, v9.2S, v2.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................... + // umull v11.2D, v0.2S, v2.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................... + // umlal v11.2D, v22.2S, v9.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................... + // umull v23.2D, v0.2S, v22.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................ + // umull v0.2D, v0.2S, v0.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................ + // usra v13.2D, v6.2D, #26 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................... + // and v28.16B, v6.16B, v30.16B // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................... + // mul v6.2S, v4.2S, v31.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................. + // bic v29.16B, v13.16B, v26.16B // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................... + // and v9.16B, v13.16B, v26.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................. + // usra v0.2D, v29.2D, #25 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................. + // mul v15.2S, v15.2S, v31.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................ + // usra v0.2D, v29.2D, #24 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................. + // mul v13.2S, v12.2S, v31.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................... + // usra v0.2D, v29.2D, #21 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................ + // mul v1.2S, v1.2S, v31.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................... + // shl v22.2S, v22.2S, #1 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................... + // shl v29.2S, v21.2S, #1 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................... + // shl v8.2S, v10.2S, #1 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................. + // shl v19.2S, v27.2S, #1 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................ + // umlal v0.2D, v1.2S, v10.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................... + // umlal v0.2D, v25.2S, v22.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................. + // umlal v0.2D, v6.2S, v2.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................... + // umlal v0.2D, v15.2S, v29.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................ + // umlal v0.2D, v13.2S, v20.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................... + // umlal v23.2D, v25.2S, v2.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................... + // umlal v23.2D, v6.2S, v21.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................. + // umlal v23.2D, v15.2S, v20.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................... + // umlal v23.2D, v13.2S, v10.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................... + // umlal v11.2D, v13.2S, v12.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................... + // umlal v11.2D, v25.2S, v29.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................. + // umlal v11.2D, v6.2S, v20.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................. + // umlal v11.2D, v15.2S, v8.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................ + // usra v23.2D, v0.2D, #26 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................ + // umlal v18.2D, v25.2S, v20.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................. + // umlal v18.2D, v6.2S, v10.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................. + // umlal v18.2D, v15.2S, v14.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................ + // usra v11.2D, v23.2D, #25 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................... + // umlal v3.2D, v15.2S, v27.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................... + // umlal v3.2D, v25.2S, v8.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................... + // umlal v3.2D, v6.2S, v14.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................ + // usra v18.2D, v11.2D, #26 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................... + // umlal v5.2D, v25.2S, v14.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................... + // umlal v5.2D, v6.2S, v27.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................... + // usra v3.2D, v18.2D, #25 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................... + // umlal v7.2D, v6.2S, v4.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................. + // umlal v7.2D, v25.2S, v19.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................... + // usra v5.2D, v3.2D, #26 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................... + // umlal v16.2D, v25.2S, v17.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................... + // usra v7.2D, v5.2D, #25 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................... + // usra v16.2D, v7.2D, #26 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................... + // usra v28.2D, v16.2D, #25 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................... + // usra v9.2D, v28.2D, #26 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................... + // and v4.16B, v3.16B, v30.16B // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................... + // and v5.16B, v5.16B, v26.16B // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................. + // and v0.16B, v0.16B, v30.16B // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................ + // and v6.16B, v7.16B, v30.16B // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................... + // and v1.16B, v23.16B, v26.16B // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................ + // and v7.16B, v16.16B, v26.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................. + // and v2.16B, v11.16B, v30.16B // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................... + // and v8.16B, v28.16B, v30.16B // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................. + // and v3.16B, v18.16B, v26.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................ + // add x1, sp, #STACK_A_0 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................... + // ld2 { v0.S, v1.S }[1], [x1], #8 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................. + // ld2 { v2.S, v3.S }[1], [x1], #8 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................. + // ld2 { v4.S, v5.S }[1], [x1], #8 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................... + // ld2 { v6.S, v7.S }[1], [x1], #8 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................. + // ld2 { v8.S, v9.S }[1], [x1], #8 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................. + // add x1, sp, #STACK_B_0 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................. + // ld2 { v10.S, v11.S }[1], [x1], #8 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................. + // ld2 { v25.S, v26.S }[1], [x1], #8 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................... + // ld2 { v12.S, v13.S }[1], [x1], #8 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................... + // ld2 { v17.S, v18.S }[1], [x1], #8 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................... + // ld2 { v27.S, v28.S }[1], [x1], #8 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................... + // add x1, sp, #STACK_X_0 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................... + // ld2 { v10.S, v11.S }[0], [x1], #8 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................ + // ld2 { v25.S, v26.S }[0], [x1], #8 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................ + // ld2 { v12.S, v13.S }[0], [x1], #8 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................... + // ld2 { v17.S, v18.S }[0], [x1], #8 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................... + // ld2 { v27.S, v28.S }[0], [x1], #8 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................... + // umull v20.2D, v0.2S, v28.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................... + // umlal v20.2D, v2.2S, v18.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................. + // umlal v20.2D, v4.2S, v13.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................ + // umlal v20.2D, v6.2S, v26.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................... + // umlal v20.2D, v8.2S, v11.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................ + // mul v21.2S, v28.2S, v31.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................... + // umull v14.2D, v1.2S, v18.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................... + // umlal v14.2D, v3.2S, v13.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................... + // umlal v14.2D, v5.2S, v26.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................. + // umlal v14.2D, v7.2S, v11.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................... + // umlal v14.2D, v9.2S, v21.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................... + // umlal v20.2D, v1.2S, v27.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................... + // umlal v20.2D, v3.2S, v17.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................. + // umlal v20.2D, v5.2S, v12.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................ + // umlal v20.2D, v7.2S, v25.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................... + // umlal v20.2D, v9.2S, v10.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................... + // shl v22.2D, v14.2D, #1 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................... + // umull v15.2D, v0.2S, v18.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................ + // umlal v15.2D, v2.2S, v13.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................... + // umlal v15.2D, v4.2S, v26.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................. + // umlal v15.2D, v6.2S, v11.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................. + // umlal v15.2D, v8.2S, v21.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................ + // mul v23.2S, v18.2S, v31.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................ + // umlal v22.2D, v0.2S, v27.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................. + // umlal v22.2D, v2.2S, v17.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................ + // umlal v22.2D, v4.2S, v12.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................... + // umlal v22.2D, v6.2S, v25.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................... + // umlal v22.2D, v8.2S, v10.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................... + // mul v18.2S, v27.2S, v31.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................... + // umull v27.2D, v1.2S, v13.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................... + // umlal v27.2D, v3.2S, v26.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................... + // umlal v27.2D, v5.2S, v11.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................. + // umlal v27.2D, v7.2S, v21.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................... + // umlal v27.2D, v9.2S, v23.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................ + // umlal v15.2D, v1.2S, v17.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................. + // umlal v15.2D, v3.2S, v12.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................... + // umlal v15.2D, v5.2S, v25.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................... + // umlal v15.2D, v7.2S, v10.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................... + // umlal v15.2D, v9.2S, v18.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................... + // shl v27.2D, v27.2D, #1 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................... + // umull v28.2D, v0.2S, v13.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................... + // umlal v28.2D, v2.2S, v26.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................... + // umlal v28.2D, v4.2S, v11.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................... + // umlal v28.2D, v6.2S, v21.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................... + // umlal v28.2D, v8.2S, v23.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................ + // mul v13.2S, v13.2S, v31.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................. + // umlal v27.2D, v0.2S, v17.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................... + // umlal v27.2D, v2.2S, v12.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................ + // umlal v27.2D, v4.2S, v25.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................... + // umlal v27.2D, v6.2S, v10.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................. + // umlal v27.2D, v8.2S, v18.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................ + // mul v16.2S, v17.2S, v31.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................... + // umull v17.2D, v1.2S, v26.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................ + // umlal v17.2D, v3.2S, v11.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................... + // umlal v17.2D, v5.2S, v21.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................... + // umlal v17.2D, v7.2S, v23.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................. + // umlal v17.2D, v9.2S, v13.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................... + // umlal v28.2D, v1.2S, v12.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................... + // umlal v28.2D, v3.2S, v25.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................... + // umlal v28.2D, v5.2S, v10.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................. + // umlal v28.2D, v7.2S, v18.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................ + // umlal v28.2D, v9.2S, v16.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................... + // shl v17.2D, v17.2D, #1 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................... + // umull v29.2D, v0.2S, v26.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................... + // umlal v29.2D, v2.2S, v11.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................. + // umlal v29.2D, v4.2S, v21.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................. + // umlal v29.2D, v6.2S, v23.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................ + // umlal v29.2D, v8.2S, v13.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................... + // mul v26.2S, v26.2S, v31.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................... + // umlal v17.2D, v0.2S, v12.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................... + // umlal v17.2D, v2.2S, v25.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................... + // umlal v17.2D, v4.2S, v10.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................. + // umlal v17.2D, v6.2S, v18.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................. + // umlal v17.2D, v8.2S, v16.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................ + // mul v14.2S, v12.2S, v31.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................... + // umull v12.2D, v1.2S, v11.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................. + // umlal v12.2D, v3.2S, v21.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................. + // umlal v12.2D, v5.2S, v23.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................. + // umlal v12.2D, v7.2S, v13.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................ + // umlal v12.2D, v9.2S, v26.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................... + // umlal v29.2D, v1.2S, v25.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................... + // umlal v29.2D, v3.2S, v10.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................. + // umlal v29.2D, v5.2S, v18.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................... + // umlal v29.2D, v7.2S, v16.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................ + // umlal v29.2D, v9.2S, v14.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................... + // shl v19.2D, v12.2D, #1 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................... + // umull v24.2D, v0.2S, v11.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................... + // umlal v24.2D, v2.2S, v21.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................... + // umlal v24.2D, v4.2S, v23.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................. + // umlal v24.2D, v6.2S, v13.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................ + // umlal v24.2D, v8.2S, v26.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................ + // mul v11.2S, v11.2S, v31.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................... + // umlal v19.2D, v0.2S, v25.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................... + // umlal v19.2D, v2.2S, v10.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................. + // umlal v19.2D, v4.2S, v18.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................ + // umlal v19.2D, v6.2S, v16.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................... + // umlal v19.2D, v8.2S, v14.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................... + // mul v12.2S, v25.2S, v31.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................... + // umull v25.2D, v1.2S, v21.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................... + // umlal v25.2D, v3.2S, v23.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................... + // umlal v25.2D, v5.2S, v13.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................. + // ushr v21.2D, v30.2D, #1 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................ + // usra v29.2D, v19.2D, #26 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................... + // and v13.16B, v19.16B, v30.16B // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................... + // umlal v24.2D, v1.2S, v10.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................... + // usra v17.2D, v29.2D, #25 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................ + // and v23.16B, v29.16B, v21.16B // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................ + // umlal v25.2D, v7.2S, v26.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................. + // usra v28.2D, v17.2D, #26 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................... + // and v17.16B, v17.16B, v30.16B // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................... + // umlal v24.2D, v3.2S, v18.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................... + // usra v27.2D, v28.2D, #25 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................... + // and v26.16B, v28.16B, v21.16B // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................... + // umlal v25.2D, v9.2S, v11.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................ + // usra v15.2D, v27.2D, #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................... + // and v27.16B, v27.16B, v30.16B // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................. + // umlal v24.2D, v5.2S, v16.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................ + // umlal v24.2D, v7.2S, v14.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................. + // umlal v24.2D, v9.2S, v12.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................. + // usra v22.2D, v15.2D, #25 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................. + // and v28.16B, v15.16B, v21.16B // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................... + // shl v25.2D, v25.2D, #1 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................... + // usra v20.2D, v22.2D, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................. + // and v22.16B, v22.16B, v30.16B // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................... + // umlal v25.2D, v0.2S, v10.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................. + // umlal v25.2D, v2.2S, v18.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................. + // umlal v25.2D, v4.2S, v16.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................... + // umlal v25.2D, v6.2S, v14.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................... + // umlal v25.2D, v8.2S, v12.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................. + // bic v15.16B, v20.16B, v21.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................ + // and v20.16B, v20.16B, v21.16B // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................... + // usra v25.2D, v15.2D, #25 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................... + // usra v25.2D, v15.2D, #24 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................. + // usra v25.2D, v15.2D, #21 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................. + // usra v24.2D, v25.2D, #26 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................ + // and v25.16B, v25.16B, v30.16B // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................... + // usra v13.2D, v24.2D, #25 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................... + // and v21.16B, v24.16B, v21.16B // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................... + // usra v23.2D, v13.2D, #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................... + // and v24.16B, v13.16B, v30.16B // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................ + // trn1 v0.4S, v0.4S, v1.4S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................... + // trn1 v2.4S, v2.4S, v3.4S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................... + // trn1 v4.4S, v4.4S, v5.4S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................... + // trn1 v6.4S, v6.4S, v7.4S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................. + // trn1 v8.4S, v8.4S, v9.4S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................... + // trn1 v11.4S, v25.4S, v21.4S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*. + // trn1 v13.4S, v24.4S, v23.4S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............... + // trn1 v15.4S, v17.4S, v26.4S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*... + // trn1 v17.4S, v27.4S, v28.4S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........ + // trn1 v19.4S, v22.4S, v20.4S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................. + // eor w1, w11, w2 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......... + // mov v10.d[0], v0.d[1] // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......... + // mov v12.d[0], v2.d[1] // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............. + // mov v14.d[0], v4.d[1] // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...... + // mov v16.d[0], v6.d[1] // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........... + // mov v18.d[0], v8.d[1] // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..... + // mov v1.d[0], x9 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................... + // mov v3.d[0], x21 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................. + // mov v5.d[0], x15 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................... + // mov v7.d[0], x12 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................. + // mov v9.d[0], x4 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................ + // ldr d28, [sp, #STACK_MASK2] // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................... + // ldr d29, [sp, #STACK_MASK1] // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............. + // mov v0.d[0], v11.d[1] // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................* + // mov v2.d[0], v13.d[1] // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............ + // mov v4.d[0], v15.d[1] // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.. + // mov v6.d[0], v17.d[1] // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....... + // mov v8.d[0], v19.d[1] // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.... + + end_label: + + + subs w11, w0, #-1 + cbnz w11, mainloop + + + mov w0, v1.s[0] + mov w1, v1.s[1] + mov w2, v3.s[0] + mov w3, v3.s[1] + mov w4, v5.s[0] + mov w5, v5.s[1] + mov w6, v7.s[0] + mov w7, v7.s[1] + mov w8, v9.s[0] + mov w9, v9.s[1] + + stp w0, w1, [sp, #80] + stp w2, w3, [sp, #88] + stp w4, w5, [sp, #96] + stp w6, w7, [sp, #104] + stp w8, w9, [sp, #112] + + mov x10, v0.d[0] + mov x11, v2.d[0] + mov x12, v4.d[0] + mov x13, v6.d[0] + mov x14, v8.d[0] + + stp x10, x11, [sp] + stp x12, x13, [sp, #16] + str x14, [sp, #32] + + adr x10, invtable + str x10, [sp, #160] + +.Linvloopnext: + ldrh w11, [x10], #2 + mov v20.s[0], w11 + str x10, [sp, #160] + + and w12, w11, #0x7f + subs w30, w12, #1 // square times + bmi .Lskipsquare + + mov w23, w3 + mov w24, w4 + mov w25, w5 + mov w26, w6 + mov w27, w7 + mov w14, w8 + add w10, w0, w0 + add w11, w1, w1 + add w12, w2, w2 + +.Lsqrloop1: + umull x20, w0, w0 + add x4, x24, x23, lsr #25 + umull x21, w10, w1 + and x3, x23, #0x1ffffff + umull x22, w10, w2 + add w13, w3, w3 + umull x23, w10, w3 + add x5, x25, x4, lsr #26 + umull x24, w11, w13 + and x4, x4, #0x3ffffff + umull x28, w4, w4 + add x6, x26, x5, lsr #25 + umull x25, w12, w3 + and x5, x5, #0x1ffffff + umull x26, w13, w3 + add w15, w5, w5 + umaddl x28, w13, w15, x28 + add x7, x27, x6, lsr #26 + umull x19, w4, w15 + and x6, x6, #0x3ffffff + umull x27, w11, w6 + add x8, x14, x7, lsr #25 + umaddl x28, w12, w6, x28 + and x7, x7, #0x1ffffff + umaddl x19, w13, w6, x19 + add x9, x9, x8, lsr #26 + umaddl x27, w10, w7, x27 + add w17, w7, w7 + umaddl x28, w11, w17, x28 + and x8, x8, #0x3ffffff + umaddl x19, w10, w9, x19 + add w14, w9, w9 + umaddl x27, w12, w5, x27 + add w16, w14, w14, lsl #1 + umaddl x28, w10, w8, x28 + add w3, w15, w15, lsl #1 + umaddl x19, w12, w7, x19 + add w16, w16, w14, lsl #4 + umaddl x27, w13, w4, x27 + add w3, w3, w15, lsl #4 + umaddl x28, w16, w9, x28 + + umaddl x19, w11, w8, x19 + add w9, w6, w6, lsl #1 + umaddl x20, w3, w5, x20 + + umaddl x24, w10, w4, x24 + add w9, w9, w6, lsl #4 + umaddl x25, w10, w5, x25 + add x19, x19, x28, lsr #26 + umaddl x26, w10, w6, x26 + and x14, x28, #0x3ffffff + umaddl x22, w11, w1, x22 + add x20, x20, x19, lsr #25 + umaddl x23, w11, w2, x23 + bic x1, x19, #0x1ffffff + umaddl x26, w12, w4, x26 + add x20, x20, x1, lsr #24 + umaddl x24, w2, w2, x24 + add w0, w4, w4 + umaddl x25, w11, w4, x25 + add x20, x20, x1, lsr #21 + umaddl x26, w11, w15, x26 + add w1, w17, w17, lsl #1 + umaddl x20, w9, w0, x20 + + umaddl x21, w9, w15, x21 + add w1, w1, w17, lsl #4 + umaddl x22, w9, w6, x22 + add w10, w8, w8, lsl #1 + umaddl x20, w1, w13, x20 + and x9, x19, #0x1ffffff + umaddl x21, w1, w4, x21 + add w10, w10, w8, lsl #4 + umaddl x22, w1, w15, x22 + subs w30, w30, #1 + umaddl x20, w10, w12, x20 + + umaddl x21, w10, w13, x21 + + umaddl x22, w10, w0, x22 + + umaddl x20, w16, w11, x20 + + umaddl x21, w16, w2, x21 + + umaddl x22, w16, w13, x22 + add w11, w6, w6 + umaddl x23, w1, w6, x23 + + umaddl x24, w1, w7, x24 + add x21, x21, x20, lsr #26 + umaddl x26, w10, w8, x26 + and x0, x20, #0x3ffffff + umaddl x23, w10, w15, x23 + add x22, x22, x21, lsr #25 + umaddl x24, w10, w11, x24 + and x1, x21, #0x1ffffff + umaddl x25, w10, w17, x25 + and x2, x22, #0x3ffffff + umaddl x23, w16, w4, x23 + add w10, w0, w0 + umaddl x24, w16, w15, x24 + add w11, w1, w1 + umaddl x25, w16, w6, x25 + add w12, w2, w2 + umaddl x26, w16, w17, x26 + add x23, x23, x22, lsr #26 + umaddl x27, w16, w8, x27 + bpl .Lsqrloop1 + + mov w11, v20.s[0] + add x4, x24, x23, lsr #25 + and x3, x23, #0x1ffffff + add x5, x25, x4, lsr #26 + and x4, x4, #0x3ffffff + add x6, x26, x5, lsr #25 + and x5, x5, #0x1ffffff + add x7, x27, x6, lsr #26 + and x6, x6, #0x3ffffff + add x8, x14, x7, lsr #25 + and x7, x7, #0x1ffffff + add x9, x9, x8, lsr #26 + and x8, x8, #0x3ffffff +.Lskipsquare: + mov w12, #40 + tst w11, #1<<8 + ubfx w13, w11, #9, #2 + bne .Lskipmul + mul w20, w13, w12 + add x20, sp, x20 + + ldp w10, w11, [x20] + ldp w12, w13, [x20, #8] + ldp w14, w15, [x20, #16] + ldp w16, w17, [x20, #24] + ldp w19, w20, [x20, #32] + mov w30, #19 + + umull x21, w1, w19 + umull x22, w1, w17 + umull x23, w1, w16 + umull x24, w1, w15 + umaddl x21, w3, w16, x21 + umaddl x22, w3, w15, x22 + umaddl x23, w3, w14, x23 + umaddl x24, w3, w13, x24 + umaddl x21, w5, w14, x21 + umaddl x22, w5, w13, x22 + umaddl x23, w5, w12, x23 + umaddl x24, w5, w11, x24 + umaddl x21, w7, w12, x21 + umaddl x22, w7, w11, x22 + umaddl x23, w7, w10, x23 + mul w27, w7, w30 + mul w25, w9, w30 + mul w26, w8, w30 + mul w28, w6, w30 + umaddl x24, w27, w20, x24 + umaddl x21, w9, w10, x21 + umaddl x22, w25, w20, x22 + umaddl x23, w25, w19, x23 + umaddl x24, w25, w17, x24 + add x22, x22, x22 + umaddl x21, w0, w20, x21 + add x24, x24, x24 + umaddl x22, w0, w19, x22 + umaddl x23, w0, w17, x23 + umaddl x24, w0, w16, x24 + umaddl x21, w2, w17, x21 + umaddl x22, w2, w16, x22 + umaddl x23, w2, w15, x23 + umaddl x24, w2, w14, x24 + umaddl x21, w4, w15, x21 + umaddl x22, w4, w14, x22 + umaddl x23, w4, w13, x23 + umaddl x24, w4, w12, x24 + umaddl x21, w6, w13, x21 + umaddl x22, w6, w12, x22 + umaddl x23, w6, w11, x23 + umaddl x24, w6, w10, x24 + umaddl x21, w8, w11, x21 + umaddl x22, w8, w10, x22 + umaddl x23, w26, w20, x23 + umaddl x24, w26, w19, x24 + umull x6, w25, w16 + umull x7, w25, w15 + umull x8, w25, w14 + umaddl x6, w5, w10, x6 + mul w5, w5, w30 + umaddl x7, w27, w17, x7 + umaddl x8, w27, w16, x8 + umaddl x6, w27, w19, x6 + umaddl x7, w5, w20, x7 + umaddl x8, w5, w19, x8 + umaddl x6, w3, w12, x6 + umaddl x7, w3, w11, x7 + umaddl x8, w3, w10, x8 + umaddl x6, w1, w14, x6 + umaddl x7, w1, w13, x7 + umaddl x8, w1, w12, x8 + mul w9, w4, w30 + add x7, x7, x7 + umaddl x6, w26, w17, x6 + umaddl x7, w26, w16, x7 + umaddl x8, w26, w15, x8 + umaddl x6, w28, w20, x6 + umaddl x7, w28, w19, x7 + umaddl x8, w28, w17, x8 + umaddl x6, w4, w11, x6 + umaddl x7, w4, w10, x7 + umaddl x8, w9, w20, x8 + umaddl x6, w2, w13, x6 + umaddl x7, w2, w12, x7 + umaddl x8, w2, w11, x8 + umaddl x6, w0, w15, x6 + umaddl x7, w0, w14, x7 + umaddl x8, w0, w13, x8 + mul w4, w3, w30 + add x6, x6, x7, lsr #26 + and x7, x7, #0x3ffffff + add x24, x24, x6, lsr #25 + and x6, x6, #0x1ffffff + add x23, x23, x24, lsr #26 + and x24, x24, #0x3ffffff + add x22, x22, x23, lsr #25 + bfi x24, x23, #32, #25 + add x21, x21, x22, lsr #26 + and x22, x22, #0x3ffffff + bic x3, x21, #0x3ffffff + lsr x23, x3, #26 + bfi x22, x21, #32, #26 + add x23, x23, x3, lsr #25 + umull x21, w25, w13 + add x23, x23, x3, lsr #22 + umull x3, w25, w12 + umaddl x23, w25, w11, x23 + umaddl x21, w27, w15, x21 + umaddl x3, w27, w14, x3 + umaddl x23, w27, w13, x23 + mul w27, w1, w30 + umaddl x3, w5, w16, x3 + umaddl x23, w5, w15, x23 + umaddl x21, w5, w17, x21 + umaddl x3, w4, w19, x3 + umaddl x23, w4, w17, x23 + umaddl x21, w4, w20, x21 + umaddl x3, w1, w10, x3 + umaddl x23, w27, w20, x23 + umaddl x21, w1, w11, x21 + mul w25, w2, w30 + add x23, x23, x23 + add x21, x21, x21 + umaddl x23, w26, w12, x23 + umaddl x3, w26, w13, x3 + umaddl x21, w26, w14, x21 + umaddl x23, w28, w14, x23 + umaddl x3, w28, w15, x3 + umaddl x21, w28, w16, x21 + umaddl x23, w9, w16, x23 + umaddl x3, w9, w17, x3 + umaddl x21, w9, w19, x21 + umaddl x23, w25, w19, x23 + umaddl x3, w25, w20, x3 + umaddl x21, w2, w10, x21 + umaddl x23, w0, w10, x23 + umaddl x3, w0, w11, x3 + umaddl x21, w0, w12, x21 + add x1, x3, x23, lsr #26 + and x0, x23, #0x3ffffff + add x2, x21, x1, lsr #25 + and x1, x1, #0x1ffffff + add x3, x8, x2, lsr #26 + and x2, x2, #0x3ffffff + add x4, x7, x3, lsr #25 + and x3, x3, #0x1ffffff + add x5, x6, x4, lsr #26 + and x4, x4, #0x3ffffff + and x5, x5, #0x3ffffff + + mov w11, v20.s[0] + mov w6, w24 + lsr x7, x24, #32 + mov w8, w22 + lsr x9, x22, #32 +.Lskipmul: + ubfx w12, w11, #11, #2 + cbz w12, .Lskipstore + mov w13, #40 + mul w12, w12, w13 + add x12, sp, x12 + + stp w0, w1, [x12] + stp w2, w3, [x12, #8] + stp w4, w5, [x12, #16] + stp w6, w7, [x12, #24] + stp w8, w9, [x12, #32] +.Lskipstore: + + ldr x10, [sp, #160] + adr x11, invtable+13*2 + cmp x10, x11 + bne .Linvloopnext + + // Final reduce + // w5 and w9 are 26 bits instead of 25 + + orr x10, x0, x1, lsl #26 + orr x10, x10, x2, lsl #51 + + lsr x11, x2, #13 + orr x11, x11, x3, lsl #13 + orr x11, x11, x4, lsl #38 + + add x12, x5, x6, lsl #25 + adds x12, x12, x7, lsl #51 + + lsr x13, x7, #13 + orr x13, x13, x8, lsl #12 + orr x13, x13, x9, lsl #38 + + adcs x13, x13, xzr + adc x14, xzr, xzr + + extr x17, x14, x13, #63 + mov w19, #19 + mul w15, w17, w19 + add w15, w15, #19 + + adds x15, x10, x15 + adcs x15, x11, xzr + adcs x15, x12, xzr + adcs x15, x13, xzr + adc x16, x14, xzr + + extr x16, x16, x15, #63 + mul w16, w16, w19 + + adds x10, x10, x16 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + and x13, x13, 0x7fffffffffffffff + + ldr x17, [sp, STACK_OUT_PTR] + stp x10, x11, [x17] + stp x12, x13, [x17, #16] + + add sp, sp, STACK_OUT_PTR+8 + + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp d8, d9, [sp, #96] + ldp d10, d11, [sp, #112] + ldp d12, d13, [sp, #128] + ldp d14, d15, [sp, #144] + ldp x29, x30, [sp], #160 + + ret + // .size x25519_scalarmult, .-x25519_scalarmult + // .type invtable, %object +invtable: + // square times, + // skip mul, + // mulsource, + // dest + .hword 1|(1<<8) |(1<<11) + .hword 2| (2<<9)|(2<<11) + .hword 0| (1<<9)|(1<<11) + .hword 1| (2<<9)|(2<<11) + .hword 5| (2<<9)|(2<<11) + .hword 10| (2<<9)|(3<<11) + .hword 20| (3<<9) + .hword 10| (2<<9)|(2<<11) + .hword 50| (2<<9)|(3<<11) + .hword 100| (3<<9) + .hword 50| (2<<9) + .hword 5| (1<<9) + .hword 0| (0<<9) + // .size invtable, .-invtable + +END: \ No newline at end of file diff --git a/tutorial_files/opt/aarch64_simple0_loop_opt_a55.s b/tutorial_files/opt/aarch64_simple0_loop_opt_a55.s new file mode 100644 index 00000000..3949d03b --- /dev/null +++ b/tutorial_files/opt/aarch64_simple0_loop_opt_a55.s @@ -0,0 +1,125 @@ +qdata0 .req q8 +qdata1 .req q9 +qdata2 .req q10 +qdata3 .req q11 + +qtwiddle .req q0 +qmodulus .req q1 + +data0 .req v8 +data1 .req v9 +data2 .req v10 +data3 .req v11 + +twiddle .req v0 +modulus .req v1 + +tmp .req v12 + +data_ptr .req x0 +twiddle_ptr .req x1 +modulus_ptr .req x2 + +.macro barmul out, in, twiddle, modulus + mul \out.8h, \in.8h, \twiddle.h[0] + sqrdmulh \in.8h, \in.8h, \twiddle.h[1] + mls \out.8h, \in.8h, \modulus.h[0] +.endm + +.macro butterfly data0, data1, tmp, twiddle, modulus + barmul \tmp, \data1, \twiddle, \modulus + sub \data1.8h, \data0.8h, \tmp.8h + add \data0.8h, \data0.8h, \tmp.8h +.endm + +count .req x2 +ldr qtwiddle, [twiddle_ptr, #0] +ldr qmodulus, [modulus_ptr, #0] +mov count, #16 + ldr q16, [x0, #16] + mul v14.8H, v16.8H, v0.H[0] + sub count, count, #1 +start: + ldr q5, [x0, #48] // ...*.............. + // gap // .................. + // gap // .................. + // gap // .................. + sqrdmulh v23.8H, v16.8H, v0.H[1] // .....*............ + // gap // .................. + ldr q15, [x0, #0] // *................. + // gap // .................. + // gap // .................. + // gap // .................. + mul v19.8H, v5.8H, v0.H[0] // .........*........ + // gap // .................. + sqrdmulh v24.8H, v5.8H, v0.H[1] // ..........*....... + // gap // .................. + mls v14.8H, v23.8H, v1.H[0] // ......*........... + // gap // .................. + ldr q4, [x0, #32] // ..*............... + // gap // .................. + // gap // .................. + // gap // .................. + mls v19.8H, v24.8H, v1.H[0] // ...........*...... + // gap // .................. + sub v5.8H, v15.8H, v14.8H // .......*.......... + // gap // .................. + ldr q16, [x0, #80] // .e................ + // gap // .................. + // gap // .................. + // gap // .................. + sub v7.8H, v4.8H, v19.8H // ............*..... + // gap // .................. + str q5, [x0, #16] // ...............*.. + // gap // .................. + add v21.8H, v15.8H, v14.8H // ........*......... + // gap // .................. + str q7, [x0, #48] // .................* + // gap // .................. + add v8.8H, v4.8H, v19.8H // .............*.... + // gap // .................. + str q21, [x0], #4*16 // ..............*... + // gap // .................. + mul v14.8H, v16.8H, v0.H[0] // ....e............. + // gap // .................. + str q8, [x0, #-32] // ................*. + // gap // .................. + + // original source code + // ldr q8, [x0, #0*16] // .........|.*............... + // ldr q9, [x0, #1*16] // e........|........e........ + // ldr q10, [x0, #2*16] // .........|.....*........... + // ldr q11, [x0, #3*16] // .........*................. + // mul v12.8h, v9.8h, v0.h[0] // .......e.|...............e. + // sqrdmulh v9.8h, v9.8h, v0.h[1] // .........|*................ + // mls v12.8h, v9.8h, v1.h[0] // .........|....*............ + // sub v9.8h, v8.8h, v12.8h // .........|.......*......... + // add v8.8h, v8.8h, v12.8h // ...*.....|...........*..... + // mul v12.8h, v11.8h, v0.h[0] // .........|..*.............. + // sqrdmulh v11.8h, v11.8h, v0.h[1] // .........|...*............. + // mls v12.8h, v11.8h, v1.h[0] // .........|......*.......... + // sub v11.8h, v10.8h, v12.8h // .*.......|.........*....... + // add v10.8h, v10.8h, v12.8h // .....*...|.............*... + // str q8, [x0], #4*16 // ......*..|..............*.. + // str q9, [x0, #-3*16] // ..*......|..........*...... + // str q10, [x0, #-2*16] // ........*|................* + // str q11, [x0, #-1*16] // ....*....|............*.... + + sub count, count, #1 + cbnz count, start + ldr q5, [x0, #48] + sqrdmulh v23.8H, v16.8H, v0.H[1] + ldr q15, [x0, #0] + mul v19.8H, v5.8H, v0.H[0] + sqrdmulh v24.8H, v5.8H, v0.H[1] + mls v14.8H, v23.8H, v1.H[0] + ldr q4, [x0, #32] + mls v19.8H, v24.8H, v1.H[0] + sub v5.8H, v15.8H, v14.8H + sub v7.8H, v4.8H, v19.8H + str q5, [x0, #16] + add v21.8H, v15.8H, v14.8H + str q7, [x0, #48] + add v8.8H, v4.8H, v19.8H + str q21, [x0], #4*16 + str q8, [x0, #-32] \ No newline at end of file diff --git a/tutorial_files/opt/aarch64_simple0_loop_opt_mca_a55.s b/tutorial_files/opt/aarch64_simple0_loop_opt_mca_a55.s new file mode 100644 index 00000000..7fbbe9d6 --- /dev/null +++ b/tutorial_files/opt/aarch64_simple0_loop_opt_mca_a55.s @@ -0,0 +1,421 @@ +qdata0 .req q8 +qdata1 .req q9 +qdata2 .req q10 +qdata3 .req q11 + +qtwiddle .req q0 +qmodulus .req q1 + +data0 .req v8 +data1 .req v9 +data2 .req v10 +data3 .req v11 + +twiddle .req v0 +modulus .req v1 + +tmp .req v12 + +data_ptr .req x0 +twiddle_ptr .req x1 +modulus_ptr .req x2 + +.macro barmul out, in, twiddle, modulus + mul \out.8h, \in.8h, \twiddle.h[0] + sqrdmulh \in.8h, \in.8h, \twiddle.h[1] + mls \out.8h, \in.8h, \modulus.h[0] +.endm + +.macro butterfly data0, data1, tmp, twiddle, modulus + barmul \tmp, \data1, \twiddle, \modulus + sub \data1.8h, \data0.8h, \tmp.8h + add \data0.8h, \data0.8h, \tmp.8h +.endm + +count .req x2 +ldr qtwiddle, [twiddle_ptr, #0] +ldr qmodulus, [modulus_ptr, #0] +mov count, #16 + ldr q31, [x0, #16] + mul v4.8H, v31.8H, v0.H[0] + sub count, count, #1 +start: + ldr q25, [x0, #48] // ...*.............. + // gap // .................. + // gap // .................. + // gap // .................. + sqrdmulh v11.8H, v31.8H, v0.H[1] // .....*............ + // gap // .................. + ldr q12, [x0, #0] // *................. + // gap // .................. + // gap // .................. + // gap // .................. + mul v3.8H, v25.8H, v0.H[0] // .........*........ + // gap // .................. + sqrdmulh v31.8H, v25.8H, v0.H[1] // ..........*....... + // gap // .................. + mls v4.8H, v11.8H, v1.H[0] // ......*........... + // gap // .................. + ldr q25, [x0, #32] // ..*............... + // gap // .................. + // gap // .................. + // gap // .................. + mls v3.8H, v31.8H, v1.H[0] // ...........*...... + // gap // .................. + sub v23.8H, v12.8H, v4.8H // .......*.......... + // gap // .................. + ldr q31, [x0, #80] // .e................ + // gap // .................. + // gap // .................. + // gap // .................. + sub v19.8H, v25.8H, v3.8H // ............*..... + // gap // .................. + str q23, [x0, #16] // ...............*.. + // gap // .................. + add v3.8H, v25.8H, v3.8H // .............*.... + // gap // .................. + str q19, [x0, #48] // .................* + // gap // .................. + add v25.8H, v12.8H, v4.8H // ........*......... + // gap // .................. + str q3, [x0, #32] // ................*. + // gap // .................. + mul v4.8H, v31.8H, v0.H[0] // ....e............. + // gap // .................. + str q25, [x0], #4*16 // ..............*... + // gap // .................. + + // original source code + // ldr q8, [x0, #0*16] // .........|.*............... + // ldr q9, [x0, #1*16] // e........|........e........ + // ldr q10, [x0, #2*16] // .........|.....*........... + // ldr q11, [x0, #3*16] // .........*................. + // mul v12.8h, v9.8h, v0.h[0] // .......e.|...............e. + // sqrdmulh v9.8h, v9.8h, v0.h[1] // .........|*................ + // mls v12.8h, v9.8h, v1.h[0] // .........|....*............ + // sub v9.8h, v8.8h, v12.8h // .........|.......*......... + // add v8.8h, v8.8h, v12.8h // .....*...|.............*... + // mul v12.8h, v11.8h, v0.h[0] // .........|..*.............. + // sqrdmulh v11.8h, v11.8h, v0.h[1] // .........|...*............. + // mls v12.8h, v11.8h, v1.h[0] // .........|......*.......... + // sub v11.8h, v10.8h, v12.8h // .*.......|.........*....... + // add v10.8h, v10.8h, v12.8h // ...*.....|...........*..... + // str q8, [x0], #4*16 // ........*|................* + // str q9, [x0, #-3*16] // ..*......|..........*...... + // str q10, [x0, #-2*16] // ......*..|..............*.. + // str q11, [x0, #-1*16] // ....*....|............*.... + + // + // LLVM MCA STATISTICS (ORIGINAL) BEGIN + // + // + // [0] Code Region + // + // Iterations: 100 + // Instructions: 1800 + // Total Cycles: 2902 + // Total uOps: 1900 + // + // Dispatch Width: 2 + // uOps Per Cycle: 0.65 + // IPC: 0.62 + // Block RThroughput: 10.0 + // + // + // Resources: + // [0.0] - CortexA55UnitALU + // [0.1] - CortexA55UnitALU + // [1] - CortexA55UnitB + // [2] - CortexA55UnitDiv + // [3.0] - CortexA55UnitFPALU + // [3.1] - CortexA55UnitFPALU + // [4] - CortexA55UnitFPDIV + // [5.0] - CortexA55UnitFPMAC + // [5.1] - CortexA55UnitFPMAC + // [6] - CortexA55UnitLd + // [7] - CortexA55UnitMAC + // [8] - CortexA55UnitSt + // + // + // Resource pressure per iteration: + // [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] + // - - - - 10.00 10.00 - - - 4.00 - 4.00 + // + // Resource pressure by instruction: + // [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] Instructions: + // - - - - - - - - - 1.00 - - ldr q8, [x0] + // - - - - - - - - - 1.00 - - ldr q9, [x0, #16] + // - - - - - - - - - 1.00 - - ldr q10, [x0, #32] + // - - - - - - - - - 1.00 - - ldr q11, [x0, #48] + // - - - - - 2.00 - - - - - - mul.8h v12, v9, v0[0] + // - - - - 2.00 - - - - - - - sqrdmulh.8h v9, v9, v0[1] + // - - - - - 2.00 - - - - - - mls.8h v12, v9, v1[0] + // - - - - 2.00 - - - - - - - sub.8h v9, v8, v12 + // - - - - - 2.00 - - - - - - add.8h v8, v8, v12 + // - - - - 2.00 - - - - - - - mul.8h v12, v11, v0[0] + // - - - - - 2.00 - - - - - - sqrdmulh.8h v11, v11, v0[1] + // - - - - 2.00 - - - - - - - mls.8h v12, v11, v1[0] + // - - - - - 2.00 - - - - - - sub.8h v11, v10, v12 + // - - - - 2.00 - - - - - - - add.8h v10, v10, v12 + // - - - - - - - - - - - 1.00 str q8, [x0], #64 + // - - - - - - - - - - - 1.00 stur q9, [x0, #-48] + // - - - - - - - - - - - 1.00 stur q10, [x0, #-32] + // - - - - - - - - - - - 1.00 stur q11, [x0, #-16] + // + // + // Timeline view: + // 0123456789 0123456789 0123456789 0123456789 + // Index 0123456789 0123456789 0123456789 0123456789 012345678 + // + // [0,0] DeeE . . . . . . . . . . . . . . . . . . ldr q8, [x0] + // [0,1] .DeeE. . . . . . . . . . . . . . . . . . ldr q9, [x0, #16] + // [0,2] . DeeE . . . . . . . . . . . . . . . . . ldr q10, [x0, #32] + // [0,3] . DeeE . . . . . . . . . . . . . . . . . ldr q11, [x0, #48] + // [0,4] . DeeeE . . . . . . . . . . . . . . . . . mul.8h v12, v9, v0[0] + // [0,5] . DeeeE. . . . . . . . . . . . . . . . . sqrdmulh.8h v9, v9, v0[1] + // [0,6] . . DeeeE . . . . . . . . . . . . . . . . mls.8h v12, v9, v1[0] + // [0,7] . . . DeE . . . . . . . . . . . . . . . sub.8h v9, v8, v12 + // [0,8] . . . DeE . . . . . . . . . . . . . . . add.8h v8, v8, v12 + // [0,9] . . . DeeeE. . . . . . . . . . . . . . . mul.8h v12, v11, v0[0] + // [0,10] . . . .DeeeE . . . . . . . . . . . . . . sqrdmulh.8h v11, v11, v0[1] + // [0,11] . . . . DeeeE. . . . . . . . . . . . . . mls.8h v12, v11, v1[0] + // [0,12] . . . . . DeE . . . . . . . . . . . . . sub.8h v11, v10, v12 + // [0,13] . . . . . DeE . . . . . . . . . . . . . add.8h v10, v10, v12 + // [0,14] . . . . . .DE . . . . . . . . . . . . . str q8, [x0], #64 + // [0,15] . . . . . . DE . . . . . . . . . . . . . stur q9, [x0, #-48] + // [0,16] . . . . . . DE. . . . . . . . . . . . . stur q10, [x0, #-32] + // [0,17] . . . . . . DE . . . . . . . . . . . . stur q11, [x0, #-16] + // [1,0] . . . . . . DeeE . . . . . . . . . . . . ldr q8, [x0] + // [1,1] . . . . . . DeeE . . . . . . . . . . . . ldr q9, [x0, #16] + // [1,2] . . . . . . .DeeE. . . . . . . . . . . . ldr q10, [x0, #32] + // [1,3] . . . . . . . DeeE . . . . . . . . . . . ldr q11, [x0, #48] + // [1,4] . . . . . . . DeeeE . . . . . . . . . . . mul.8h v12, v9, v0[0] + // [1,5] . . . . . . . DeeeE . . . . . . . . . . . sqrdmulh.8h v9, v9, v0[1] + // [1,6] . . . . . . . . DeeeE . . . . . . . . . . mls.8h v12, v9, v1[0] + // [1,7] . . . . . . . . . DeE. . . . . . . . . . sub.8h v9, v8, v12 + // [1,8] . . . . . . . . . DeE . . . . . . . . . add.8h v8, v8, v12 + // [1,9] . . . . . . . . . DeeeE . . . . . . . . . mul.8h v12, v11, v0[0] + // [1,10] . . . . . . . . . DeeeE. . . . . . . . . sqrdmulh.8h v11, v11, v0[1] + // [1,11] . . . . . . . . . . DeeeE . . . . . . . . mls.8h v12, v11, v1[0] + // [1,12] . . . . . . . . . . . DeE . . . . . . . sub.8h v11, v10, v12 + // [1,13] . . . . . . . . . . . DeE . . . . . . . add.8h v10, v10, v12 + // [1,14] . . . . . . . . . . . DE . . . . . . . str q8, [x0], #64 + // [1,15] . . . . . . . . . . . .DE . . . . . . . stur q9, [x0, #-48] + // [1,16] . . . . . . . . . . . . DE . . . . . . . stur q10, [x0, #-32] + // [1,17] . . . . . . . . . . . . DE. . . . . . . stur q11, [x0, #-16] + // [2,0] . . . . . . . . . . . . DeeE . . . . . . ldr q8, [x0] + // [2,1] . . . . . . . . . . . . DeeE . . . . . . ldr q9, [x0, #16] + // [2,2] . . . . . . . . . . . . DeeE . . . . . . ldr q10, [x0, #32] + // [2,3] . . . . . . . . . . . . .DeeE. . . . . . ldr q11, [x0, #48] + // [2,4] . . . . . . . . . . . . . DeeeE . . . . . mul.8h v12, v9, v0[0] + // [2,5] . . . . . . . . . . . . . DeeeE . . . . . sqrdmulh.8h v9, v9, v0[1] + // [2,6] . . . . . . . . . . . . . . DeeeE . . . . mls.8h v12, v9, v1[0] + // [2,7] . . . . . . . . . . . . . . .DeE . . . . sub.8h v9, v8, v12 + // [2,8] . . . . . . . . . . . . . . . DeE. . . . add.8h v8, v8, v12 + // [2,9] . . . . . . . . . . . . . . . DeeeE . . . mul.8h v12, v11, v0[0] + // [2,10] . . . . . . . . . . . . . . . DeeeE . . . sqrdmulh.8h v11, v11, v0[1] + // [2,11] . . . . . . . . . . . . . . . . DeeeE . . mls.8h v12, v11, v1[0] + // [2,12] . . . . . . . . . . . . . . . . . DeE. . sub.8h v11, v10, v12 + // [2,13] . . . . . . . . . . . . . . . . . DeE . add.8h v10, v10, v12 + // [2,14] . . . . . . . . . . . . . . . . . DE . str q8, [x0], #64 + // [2,15] . . . . . . . . . . . . . . . . . DE . stur q9, [x0, #-48] + // [2,16] . . . . . . . . . . . . . . . . . .DE. stur q10, [x0, #-32] + // [2,17] . . . . . . . . . . . . . . . . . . DE stur q11, [x0, #-16] + // + // + // Average Wait times (based on the timeline view): + // [0]: Executions + // [1]: Average time spent waiting in a scheduler's queue + // [2]: Average time spent waiting in a scheduler's queue while ready + // [3]: Average time elapsed from WB until retire stage + // + // [0] [1] [2] [3] + // 0. 3 0.0 0.0 0.0 ldr q8, [x0] + // 1. 3 0.0 0.0 0.0 ldr q9, [x0, #16] + // 2. 3 0.0 0.0 0.0 ldr q10, [x0, #32] + // 3. 3 0.0 0.0 0.0 ldr q11, [x0, #48] + // 4. 3 0.0 0.0 0.0 mul.8h v12, v9, v0[0] + // 5. 3 0.0 0.0 0.0 sqrdmulh.8h v9, v9, v0[1] + // 6. 3 0.0 0.0 0.0 mls.8h v12, v9, v1[0] + // 7. 3 0.0 0.0 0.0 sub.8h v9, v8, v12 + // 8. 3 0.0 0.0 0.0 add.8h v8, v8, v12 + // 9. 3 0.0 0.0 0.0 mul.8h v12, v11, v0[0] + // 10. 3 0.0 0.0 0.0 sqrdmulh.8h v11, v11, v0[1] + // 11. 3 0.0 0.0 0.0 mls.8h v12, v11, v1[0] + // 12. 3 0.0 0.0 0.0 sub.8h v11, v10, v12 + // 13. 3 0.0 0.0 0.0 add.8h v10, v10, v12 + // 14. 3 0.0 0.0 0.0 str q8, [x0], #64 + // 15. 3 0.0 0.0 0.0 stur q9, [x0, #-48] + // 16. 3 0.0 0.0 0.0 stur q10, [x0, #-32] + // 17. 3 0.0 0.0 0.0 stur q11, [x0, #-16] + // 3 0.0 0.0 0.0 + // + // + // ORIGINAL LLVM MCA STATISTICS (ORIGINAL) END + // + // + // LLVM MCA STATISTICS (OPTIMIZED) BEGIN + // + // + // [0] Code Region + // + // Iterations: 100 + // Instructions: 1800 + // Total Cycles: 1803 + // Total uOps: 1900 + // + // Dispatch Width: 2 + // uOps Per Cycle: 1.05 + // IPC: 1.00 + // Block RThroughput: 10.0 + // + // + // Resources: + // [0.0] - CortexA55UnitALU + // [0.1] - CortexA55UnitALU + // [1] - CortexA55UnitB + // [2] - CortexA55UnitDiv + // [3.0] - CortexA55UnitFPALU + // [3.1] - CortexA55UnitFPALU + // [4] - CortexA55UnitFPDIV + // [5.0] - CortexA55UnitFPMAC + // [5.1] - CortexA55UnitFPMAC + // [6] - CortexA55UnitLd + // [7] - CortexA55UnitMAC + // [8] - CortexA55UnitSt + // + // + // Resource pressure per iteration: + // [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] + // - - - - 10.00 10.00 - - - 4.00 - 4.00 + // + // Resource pressure by instruction: + // [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] Instructions: + // - - - - - - - - - 1.00 - - ldr q25, [x0, #48] + // - - - - - 2.00 - - - - - - sqrdmulh.8h v11, v31, v0[1] + // - - - - - - - - - 1.00 - - ldr q12, [x0] + // - - - - 2.00 - - - - - - - mul.8h v3, v25, v0[0] + // - - - - - 2.00 - - - - - - sqrdmulh.8h v31, v25, v0[1] + // - - - - 2.00 - - - - - - - mls.8h v4, v11, v1[0] + // - - - - - - - - - 1.00 - - ldr q25, [x0, #32] + // - - - - - 2.00 - - - - - - mls.8h v3, v31, v1[0] + // - - - - 2.00 - - - - - - - sub.8h v23, v12, v4 + // - - - - - - - - - 1.00 - - ldr q31, [x0, #80] + // - - - - - 2.00 - - - - - - sub.8h v19, v25, v3 + // - - - - - - - - - - - 1.00 str q23, [x0, #16] + // - - - - 2.00 - - - - - - - add.8h v3, v25, v3 + // - - - - - - - - - - - 1.00 str q19, [x0, #48] + // - - - - - 2.00 - - - - - - add.8h v25, v12, v4 + // - - - - - - - - - - - 1.00 str q3, [x0, #32] + // - - - - 2.00 - - - - - - - mul.8h v4, v31, v0[0] + // - - - - - - - - - - - 1.00 str q25, [x0], #64 + // + // + // Timeline view: + // 0123456789 0123456789 0123456 + // Index 0123456789 0123456789 0123456789 + // + // [0,0] DeeE . . . . . . . . . . .. ldr q25, [x0, #48] + // [0,1] .DeeeE . . . . . . . . . .. sqrdmulh.8h v11, v31, v0[1] + // [0,2] . DeeE . . . . . . . . . .. ldr q12, [x0] + // [0,3] . DeeeE . . . . . . . . . .. mul.8h v3, v25, v0[0] + // [0,4] . DeeeE . . . . . . . . . .. sqrdmulh.8h v31, v25, v0[1] + // [0,5] . DeeeE. . . . . . . . . .. mls.8h v4, v11, v1[0] + // [0,6] . .DeeE. . . . . . . . . .. ldr q25, [x0, #32] + // [0,7] . . DeeeE . . . . . . . . .. mls.8h v3, v31, v1[0] + // [0,8] . . DeE . . . . . . . . .. sub.8h v23, v12, v4 + // [0,9] . . DeeE . . . . . . . . .. ldr q31, [x0, #80] + // [0,10] . . . DeE. . . . . . . . .. sub.8h v19, v25, v3 + // [0,11] . . . DE . . . . . . . . .. str q23, [x0, #16] + // [0,12] . . . DeE . . . . . . . .. add.8h v3, v25, v3 + // [0,13] . . . DE . . . . . . . .. str q19, [x0, #48] + // [0,14] . . . DeE . . . . . . . .. add.8h v25, v12, v4 + // [0,15] . . . DE . . . . . . . .. str q3, [x0, #32] + // [0,16] . . . .DeeeE . . . . . . .. mul.8h v4, v31, v0[0] + // [0,17] . . . . DE . . . . . . . .. str q25, [x0], #64 + // [1,0] . . . . DeeE . . . . . . .. ldr q25, [x0, #48] + // [1,1] . . . . DeeeE . . . . . . .. sqrdmulh.8h v11, v31, v0[1] + // [1,2] . . . . DeeE . . . . . . .. ldr q12, [x0] + // [1,3] . . . . .DeeeE . . . . . .. mul.8h v3, v25, v0[0] + // [1,4] . . . . . DeeeE . . . . . .. sqrdmulh.8h v31, v25, v0[1] + // [1,5] . . . . . DeeeE . . . . . .. mls.8h v4, v11, v1[0] + // [1,6] . . . . . DeeE . . . . . .. ldr q25, [x0, #32] + // [1,7] . . . . . .DeeeE . . . . .. mls.8h v3, v31, v1[0] + // [1,8] . . . . . . DeE . . . . .. sub.8h v23, v12, v4 + // [1,9] . . . . . . DeeE . . . . .. ldr q31, [x0, #80] + // [1,10] . . . . . . DeE . . . . .. sub.8h v19, v25, v3 + // [1,11] . . . . . . DE . . . . .. str q23, [x0, #16] + // [1,12] . . . . . . .DeE . . . . .. add.8h v3, v25, v3 + // [1,13] . . . . . . . DE . . . . .. str q19, [x0, #48] + // [1,14] . . . . . . . DeE . . . .. add.8h v25, v12, v4 + // [1,15] . . . . . . . DE. . . . .. str q3, [x0, #32] + // [1,16] . . . . . . . DeeeE . . . .. mul.8h v4, v31, v0[0] + // [1,17] . . . . . . . DE . . . .. str q25, [x0], #64 + // [2,0] . . . . . . . .DeeE. . . .. ldr q25, [x0, #48] + // [2,1] . . . . . . . . DeeeE . . .. sqrdmulh.8h v11, v31, v0[1] + // [2,2] . . . . . . . . DeeE . . .. ldr q12, [x0] + // [2,3] . . . . . . . . DeeeE . . .. mul.8h v3, v25, v0[0] + // [2,4] . . . . . . . . DeeeE. . .. sqrdmulh.8h v31, v25, v0[1] + // [2,5] . . . . . . . . .DeeeE . .. mls.8h v4, v11, v1[0] + // [2,6] . . . . . . . . . DeeE . .. ldr q25, [x0, #32] + // [2,7] . . . . . . . . . DeeeE . .. mls.8h v3, v31, v1[0] + // [2,8] . . . . . . . . . .DeE . .. sub.8h v23, v12, v4 + // [2,9] . . . . . . . . . .DeeE. .. ldr q31, [x0, #80] + // [2,10] . . . . . . . . . . DeE .. sub.8h v19, v25, v3 + // [2,11] . . . . . . . . . . DE. .. str q23, [x0, #16] + // [2,12] . . . . . . . . . . DeE .. add.8h v3, v25, v3 + // [2,13] . . . . . . . . . . DE .. str q19, [x0, #48] + // [2,14] . . . . . . . . . . .DeE .. add.8h v25, v12, v4 + // [2,15] . . . . . . . . . . .DE .. str q3, [x0, #32] + // [2,16] . . . . . . . . . . . DeeeE mul.8h v4, v31, v0[0] + // [2,17] . . . . . . . . . . . DE.. str q25, [x0], #64 + // + // + // Average Wait times (based on the timeline view): + // [0]: Executions + // [1]: Average time spent waiting in a scheduler's queue + // [2]: Average time spent waiting in a scheduler's queue while ready + // [3]: Average time elapsed from WB until retire stage + // + // [0] [1] [2] [3] + // 0. 3 0.0 0.0 0.0 ldr q25, [x0, #48] + // 1. 3 0.0 0.0 0.0 sqrdmulh.8h v11, v31, v0[1] + // 2. 3 0.0 0.0 0.0 ldr q12, [x0] + // 3. 3 0.0 0.0 0.0 mul.8h v3, v25, v0[0] + // 4. 3 0.0 0.0 0.0 sqrdmulh.8h v31, v25, v0[1] + // 5. 3 0.0 0.0 0.0 mls.8h v4, v11, v1[0] + // 6. 3 0.0 0.0 0.0 ldr q25, [x0, #32] + // 7. 3 0.0 0.0 0.0 mls.8h v3, v31, v1[0] + // 8. 3 0.0 0.0 0.0 sub.8h v23, v12, v4 + // 9. 3 0.0 0.0 0.0 ldr q31, [x0, #80] + // 10. 3 0.0 0.0 0.0 sub.8h v19, v25, v3 + // 11. 3 0.0 0.0 0.0 str q23, [x0, #16] + // 12. 3 0.0 0.0 0.0 add.8h v3, v25, v3 + // 13. 3 0.0 0.0 0.0 str q19, [x0, #48] + // 14. 3 0.0 0.0 0.0 add.8h v25, v12, v4 + // 15. 3 0.0 0.0 0.0 str q3, [x0, #32] + // 16. 3 0.0 0.0 0.0 mul.8h v4, v31, v0[0] + // 17. 3 0.0 0.0 0.0 str q25, [x0], #64 + // 3 0.0 0.0 0.0 + // + // + // ORIGINAL LLVM MCA STATISTICS (OPTIMIZED) END + // + sub count, count, #1 + cbnz count, start + ldr q25, [x0, #48] + sqrdmulh v11.8H, v31.8H, v0.H[1] + ldr q12, [x0, #0] + mul v3.8H, v25.8H, v0.H[0] + sqrdmulh v31.8H, v25.8H, v0.H[1] + mls v4.8H, v11.8H, v1.H[0] + ldr q25, [x0, #32] + mls v3.8H, v31.8H, v1.H[0] + sub v23.8H, v12.8H, v4.8H + sub v19.8H, v25.8H, v3.8H + str q23, [x0, #16] + add v3.8H, v25.8H, v3.8H + str q19, [x0, #48] + add v25.8H, v12.8H, v4.8H + str q3, [x0, #32] + str q25, [x0], #4*16 \ No newline at end of file diff --git a/tutorial_files/opt/aarch64_simple0_macros_opt_a55.s b/tutorial_files/opt/aarch64_simple0_macros_opt_a55.s new file mode 100644 index 00000000..136fb51c --- /dev/null +++ b/tutorial_files/opt/aarch64_simple0_macros_opt_a55.s @@ -0,0 +1,117 @@ +qdata0 .req q8 +qdata1 .req q9 +qdata2 .req q10 +qdata3 .req q11 + +qtwiddle .req q0 +qmodulus .req q1 + +data0 .req v8 +data1 .req v9 +data2 .req v10 +data3 .req v11 + +twiddle .req v0 +modulus .req v1 + +tmp .req v12 + +data_ptr .req x0 +twiddle_ptr .req x1 +modulus_ptr .req x2 + +.macro barmul out, in, twiddle, modulus + mul \out.8h, \in.8h, \twiddle.h[0] + sqrdmulh \in.8h, \in.8h, \twiddle.h[1] + mls \out.8h, \in.8h, \modulus.h[0] +.endm + +.macro butterfly data0, data1, tmp, twiddle, modulus + barmul \tmp, \data1, \twiddle, \modulus + sub \data1.8h, \data0.8h, \tmp.8h + add \data0.8h, \data0.8h, \tmp.8h +.endm + +count .req x2 + + start: + ldr q4, [x1, #0] // *................... + // gap // .................... + // gap // .................... + // gap // .................... + ldr q25, [x0, #16] // ...*................ + // gap // .................... + // gap // .................... + // gap // .................... + ldr q1, [x0, #48] // .....*.............. + // gap // .................... + // gap // .................... + // gap // .................... + mul v24.8H, v25.8H, v4.H[0] // ......*............. + // gap // .................... + sqrdmulh v25.8H, v25.8H, v4.H[1] // .......*............ + // gap // .................... + mul v8.8H, v1.8H, v4.H[0] // ...........*........ + // gap // .................... + sqrdmulh v4.8H, v1.8H, v4.H[1] // ............*....... + // gap // .................... + ldr q1, [x2, #0] // .*.................. + // gap // .................... + // gap // .................... + // gap // .................... + ldr q20, [x0, #0] // ..*................. + // gap // .................... + // gap // .................... + // gap // .................... + mls v24.8H, v25.8H, v1.H[0] // ........*........... + // gap // .................... + mls v8.8H, v4.8H, v1.H[0] // .............*...... + // gap // .................... + ldr q4, [x0, #32] // ....*............... + // gap // .................... + // gap // .................... + // gap // .................... + sub v25.8H, v20.8H, v24.8H // .........*.......... + // gap // .................... + add v1.8H, v20.8H, v24.8H // ..........*......... + // gap // .................... + sub v24.8H, v4.8H, v8.8H // ..............*..... + // gap // .................... + str q25, [x0, #16] // .................*.. + // gap // .................... + add v4.8H, v4.8H, v8.8H // ...............*.... + // gap // .................... + str q1, [x0], #4*16 // ................*... + // gap // .................... + // gap // .................... + // gap // .................... + str q4, [x0, #-32] // ..................*. + // gap // .................... + // gap // .................... + // gap // .................... + str q24, [x0, #-16] // ...................* + // gap // .................... + + // original source code + // ldr q0, [x1, #0] // *................... + // ldr q1, [x2, #0] // .......*............ + // ldr q8, [x0, #0*16] // ........*........... + // ldr q9, [x0, #1*16] // .*.................. + // ldr q10, [x0, #2*16] // ...........*........ + // ldr q11, [x0, #3*16] // ..*................. + // mul v12.8h, v9.8h, v0.h[0] // ...*................ + // sqrdmulh v9.8h, v9.8h, v0.h[1] // ....*............... + // mls v12.8h, v9.8h, v1.h[0] // .........*.......... + // sub v9.8h, v8.8h, v12.8h // ............*....... + // add v8.8h, v8.8h, v12.8h // .............*...... + // mul v12.8h, v11.8h, v0.h[0] // .....*.............. + // sqrdmulh v11.8h, v11.8h, v0.h[1] // ......*............. + // mls v12.8h, v11.8h, v1.h[0] // ..........*......... + // sub v11.8h, v10.8h, v12.8h // ..............*..... + // add v10.8h, v10.8h, v12.8h // ................*... + // str q8, [x0], #4*16 // .................*.. + // str q9, [x0, #-3*16] // ...............*.... + // str q10, [x0, #-2*16] // ..................*. + // str q11, [x0, #-1*16] // ...................* + + end: diff --git a/tutorial_files/opt/aarch64_simple0_opt_a55.s b/tutorial_files/opt/aarch64_simple0_opt_a55.s new file mode 100644 index 00000000..3c1beca7 --- /dev/null +++ b/tutorial_files/opt/aarch64_simple0_opt_a55.s @@ -0,0 +1,78 @@ + ldr q7, [x1, #0] // *................... + // gap // .................... + // gap // .................... + // gap // .................... + ldr q31, [x0, #16] // ...*................ + // gap // .................... + // gap // .................... + // gap // .................... + ldr q24, [x0, #48] // .....*.............. + // gap // .................... + // gap // .................... + // gap // .................... + mul v29.8H, v31.8H, v7.H[0] // ......*............. + // gap // .................... + sqrdmulh v31.8H, v31.8H, v7.H[1] // .......*............ + // gap // .................... + mul v16.8H, v24.8H, v7.H[0] // ...........*........ + // gap // .................... + sqrdmulh v7.8H, v24.8H, v7.H[1] // ............*....... + // gap // .................... + ldr q1, [x2, #0] // .*.................. + // gap // .................... + // gap // .................... + // gap // .................... + ldr q24, [x0] // ..*................. + // gap // .................... + // gap // .................... + // gap // .................... + mls v29.8H, v31.8H, v1.H[0] // ........*........... + // gap // .................... + mls v16.8H, v7.8H, v1.H[0] // .............*...... + // gap // .................... + ldr q7, [x0, #32] // ....*............... + // gap // .................... + // gap // .................... + // gap // .................... + sub v31.8H, v24.8H, v29.8H // .........*.......... + // gap // .................... + add v24.8H, v24.8H, v29.8H // ..........*......... + // gap // .................... + sub v29.8H, v7.8H, v16.8H // ..............*..... + // gap // .................... + str q31, [x0, #16] // .................*.. + // gap // .................... + add v7.8H, v7.8H, v16.8H // ...............*.... + // gap // .................... + str q24, [x0], #4*16 // ................*... + // gap // .................... + // gap // .................... + // gap // .................... + str q7, [x0, #-32] // ..................*. + // gap // .................... + // gap // .................... + // gap // .................... + str q29, [x0, #-16] // ...................* + // gap // .................... + + // original source code + // ldr q0, [x1, #0] // *................... + // ldr q1, [x2, #0] // .......*............ + // ldr q8, [x0] // ........*........... + // ldr q9, [x0, #1*16] // .*.................. + // ldr q10, [x0, #2*16] // ...........*........ + // ldr q11, [x0, #3*16] // ..*................. + // mul v24.8h, v9.8h, v0.h[0] // ...*................ + // sqrdmulh v9.8h, v9.8h, v0.h[1] // ....*............... + // mls v24.8h, v9.8h, v1.h[0] // .........*.......... + // sub v9.8h, v8.8h, v24.8h // ............*....... + // add v8.8h, v8.8h, v24.8h // .............*...... + // mul v24.8h, v11.8h, v0.h[0] // .....*.............. + // sqrdmulh v11.8h, v11.8h, v0.h[1] // ......*............. + // mls v24.8h, v11.8h, v1.h[0] // ..........*......... + // sub v11.8h, v10.8h, v24.8h // ..............*..... + // add v10.8h, v10.8h, v24.8h // ................*... + // str q8, [x0], #4*16 // .................*.. + // str q9, [x0, #-3*16] // ...............*.... + // str q10, [x0, #-2*16] // ..................*. + // str q11, [x0, #-1*16] // ...................* diff --git a/tutorial_files/opt/ntt_kyber_123_4567_opt_a55.s b/tutorial_files/opt/ntt_kyber_123_4567_opt_a55.s new file mode 100644 index 00000000..b8f43f7e --- /dev/null +++ b/tutorial_files/opt/ntt_kyber_123_4567_opt_a55.s @@ -0,0 +1,1350 @@ +/// +/// Copyright (c) 2022 Arm Limited +/// Copyright (c) 2022 Hanno Becker +/// Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer +/// SPDX-License-Identifier: MIT +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to deal +/// in the Software without restriction, including without limitation the rights +/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +/// copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in all +/// copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +/// SOFTWARE. +/// + +// Needed to provide ASM_LOAD directive +#include + +// NOTE +// We use a lot of trivial macros to simplify the parsing burden for Slothy +// The macros are not unfolded by Slothy and thus interpreted as instructions, +// which are easier to parse due to e.g. the lack of size specifiers and simpler +// syntax for pre and post increment for loads and stores. + +// Eventually, NeLight should include a proper parser for AArch64, +// but for initial investigations, the below is enough. + +.macro ldr_vo vec, base, offset + ldr qform_\vec, [\base, #\offset] +.endm + +.macro ldr_vi vec, base, inc + ldr qform_\vec, [\base], #\inc +.endm + +.macro str_vo vec, base, offset + str qform_\vec, [\base, #\offset] +.endm +.macro str_vi vec, base, inc + str qform_\vec, [\base], #\inc +.endm + +.macro vqrdmulh d,a,b + sqrdmulh \d\().8h, \a\().8h, \b\().8h +.endm +.macro vmlsq d,a,b,i + mls \d\().8h, \a\().8h, \b\().h[\i] +.endm +.macro vqrdmulhq d,a,b,i + sqrdmulh \d\().8h, \a\().8h, \b\().h[\i] +.endm +.macro vqdmulhq d,a,b,i + sqdmulh \d\().8h, \a\().8h, \b\().h[\i] +.endm +.macro vmulq d,a,b,i + mul \d\().8h, \a\().8h, \b\().h[\i] +.endm + +.macro mulmodq dst, src, const, idx0, idx1 + vmulq \dst, \src, \const, \idx0 + vqrdmulhq \src, \src, \const, \idx1 + vmlsq \dst, \src, consts, 0 +.endm + +.macro mulmod dst, src, const, const_twisted + mul \dst\().8h, \src\().8h, \const\().8h + vqrdmulh \src, \src, \const_twisted + vmlsq \dst, \src, consts, 0 +.endm + +.macro ct_butterfly a, b, root, idx0, idx1 + mulmodq tmp, \b, \root, \idx0, \idx1 + sub \b\().8h, \a\().8h, tmp.8h + add \a\().8h, \a\().8h, tmp.8h +.endm + +.macro mulmod_v dst, src, const, const_twisted + mul \dst\().8h, \src\().8h, \const\().8h + vqrdmulh \src, \src, \const_twisted + vmlsq \dst, \src, consts, 0 +.endm + +.macro ct_butterfly_v a, b, root, root_twisted + mulmod tmp, \b, \root, \root_twisted + sub \b\().8h, \a\().8h, tmp.8h + add \a\().8h, \a\().8h, tmp.8h +.endm + +.macro barrett_reduce a + vqdmulhq t0, \a, consts, 1 + srshr t0.8h, t0.8h, #11 + vmlsq \a, t0, consts, 0 +.endm + +.macro load_roots_123 + ldr_vi root0, r_ptr0, 32 + ldr_vo root1, r_ptr0, -16 +.endm + +.macro load_next_roots_45 + ldr_vi root0, r_ptr0, 16 +.endm + +.macro load_next_roots_67 + ldr_vi root0, r_ptr1, (6*16) + ldr_vo root0_tw, r_ptr1, (-6*16 + 1*16) + ldr_vo root1, r_ptr1, (-6*16 + 2*16) + ldr_vo root1_tw, r_ptr1, (-6*16 + 3*16) + ldr_vo root2, r_ptr1, (-6*16 + 4*16) + ldr_vo root2_tw, r_ptr1, (-6*16 + 5*16) +.endm + +.macro transpose4 data + trn1 t0.4s, \data\()0.4s, \data\()1.4s + trn2 t1.4s, \data\()0.4s, \data\()1.4s + trn1 t2.4s, \data\()2.4s, \data\()3.4s + trn2 t3.4s, \data\()2.4s, \data\()3.4s + + trn2 \data\()2.2d, t0.2d, t2.2d + trn2 \data\()3.2d, t1.2d, t3.2d + trn1 \data\()0.2d, t0.2d, t2.2d + trn1 \data\()1.2d, t1.2d, t3.2d +.endm + +.macro transpose_single data_out, data_in + trn1 \data_out\()0.4s, \data_in\()0.4s, \data_in\()1.4s + trn2 \data_out\()1.4s, \data_in\()0.4s, \data_in\()1.4s + trn1 \data_out\()2.4s, \data_in\()2.4s, \data_in\()3.4s + trn2 \data_out\()3.4s, \data_in\()2.4s, \data_in\()3.4s +.endm + +.macro save_gprs // @slothy:no-unfold + sub sp, sp, #(16*6) + stp x19, x20, [sp, #16*0] + stp x19, x20, [sp, #16*0] + stp x21, x22, [sp, #16*1] + stp x23, x24, [sp, #16*2] + stp x25, x26, [sp, #16*3] + stp x27, x28, [sp, #16*4] + str x29, [sp, #16*5] +.endm + +.macro restore_gprs // @slothy:no-unfold + ldp x19, x20, [sp, #16*0] + ldp x21, x22, [sp, #16*1] + ldp x23, x24, [sp, #16*2] + ldp x25, x26, [sp, #16*3] + ldp x27, x28, [sp, #16*4] + ldr x29, [sp, #16*5] + add sp, sp, #(16*6) +.endm + +.macro save_vregs // @slothy:no-unfold + sub sp, sp, #(16*4) + stp d8, d9, [sp, #16*0] + stp d10, d11, [sp, #16*1] + stp d12, d13, [sp, #16*2] + stp d14, d15, [sp, #16*3] +.endm + +.macro restore_vregs // @slothy:no-unfold + ldp d8, d9, [sp, #16*0] + ldp d10, d11, [sp, #16*1] + ldp d12, d13, [sp, #16*2] + ldp d14, d15, [sp, #16*3] + add sp, sp, #(16*4) +.endm + +#define STACK_SIZE 16 +#define STACK0 0 + +.macro restore a, loc // @slothy:no-unfold + ldr \a, [sp, #\loc\()] +.endm +.macro save loc, a // @slothy:no-unfold + str \a, [sp, #\loc\()] +.endm +.macro push_stack // @slothy:no-unfold + save_gprs + save_vregs + sub sp, sp, #STACK_SIZE +.endm + +.macro pop_stack // @slothy:no-unfold + add sp, sp, #STACK_SIZE + restore_vregs + restore_gprs +.endm + +.data +.p2align 4 +roots: +#include "ntt_kyber_123_45_67_twiddles.s" +.text + + .global ntt_kyber_123_4567 + .global _ntt_kyber_123_4567 + +.p2align 4 +const_addr: .short 3329 + .short 20159 + .short 0 + .short 0 + .short 0 + .short 0 + .short 0 + .short 0 +ntt_kyber_123_4567: +_ntt_kyber_123_4567: + push_stack + + in .req x0 + inp .req x1 + count .req x2 + r_ptr0 .req x3 + r_ptr1 .req x4 + xtmp .req x5 + + qform_v0 .req q0 + qform_v1 .req q1 + qform_v2 .req q2 + qform_v3 .req q3 + qform_v4 .req q4 + qform_v5 .req q5 + qform_v6 .req q6 + qform_v7 .req q7 + qform_v8 .req q8 + qform_v9 .req q9 + qform_v10 .req q10 + qform_v11 .req q11 + qform_v12 .req q12 + qform_v13 .req q13 + qform_v14 .req q14 + qform_v15 .req q15 + qform_v16 .req q16 + qform_v17 .req q17 + qform_v18 .req q18 + qform_v19 .req q19 + qform_v20 .req q20 + qform_v21 .req q21 + qform_v22 .req q22 + qform_v23 .req q23 + qform_v24 .req q24 + qform_v25 .req q25 + qform_v26 .req q26 + qform_v27 .req q27 + qform_v28 .req q28 + qform_v29 .req q29 + qform_v30 .req q30 + qform_v31 .req q31 + + data0 .req v8 + data1 .req v9 + data2 .req v10 + data3 .req v11 + data4 .req v12 + data5 .req v13 + data6 .req v14 + data7 .req v15 + + x_00 .req x10 + x_01 .req x11 + x_10 .req x12 + x_11 .req x13 + x_20 .req x14 + x_21 .req x15 + x_30 .req x16 + x_31 .req x17 + + xt_00 .req x_00 + xt_01 .req x_20 + xt_10 .req x_10 + xt_11 .req x_30 + xt_20 .req x_01 + xt_21 .req x_21 + xt_30 .req x_11 + xt_31 .req x_31 + + qform_data0 .req q8 + qform_data1 .req q9 + qform_data2 .req q10 + qform_data3 .req q11 + qform_data4 .req q12 + qform_data5 .req q13 + qform_data6 .req q14 + qform_data7 .req q15 + + root0 .req v0 + root1 .req v1 + root2 .req v2 + root0_tw .req v4 + root1_tw .req v5 + root2_tw .req v6 + + consts .req v7 + qform_consts .req q7 + + qform_root0 .req q0 + qform_root1 .req q1 + qform_root2 .req q2 + qform_root0_tw .req q4 + qform_root1_tw .req q5 + qform_root2_tw .req q6 + + tmp .req v24 + t0 .req v25 + t1 .req v26 + t2 .req v27 + t3 .req v28 + + ASM_LOAD(r_ptr0, roots) + ASM_LOAD(r_ptr1, roots_l56) + + ASM_LOAD(xtmp, const_addr) + ld1 {consts.8h}, [xtmp] + + save STACK0, in + mov count, #4 + + load_roots_123 + + .p2align 2 + ldr q4, [x0, #0] // *......... + // gap // .......... + // gap // .......... + // gap // .......... + ldr q16, [x0, #128] // ..*....... + // gap // .......... + // gap // .......... + // gap // .......... + ldr q5, [x0, #192] // ...*...... + // gap // .......... + // gap // .......... + // gap // .......... + ldr q26, [x0, #256] // ....*..... + // gap // .......... + // gap // .......... + // gap // .......... + ldr q14, [x0, #320] // .....*.... + // gap // .......... + // gap // .......... + // gap // .......... + sqrdmulh v9.8H, v26.8H, v0.H[1] // ......*... + // gap // .......... + ldr q15, [x0, #448] // .......*.. + // gap // .......... + // gap // .......... + // gap // .......... + ldr q29, [x0, #384] // ........*. + // gap // .......... + // gap // .......... + // gap // .......... + sqrdmulh v13.8H, v15.8H, v0.H[1] // .........* + // gap // .......... + ldr q17, [x0, #64] // .*........ + // gap // .......... + + // original source code + // ldr q4, [x0, #0] // *......... + // ldr q17, [x0, #64] // .........* + // ldr q16, [x0, #128] // .*........ + // ldr q5, [x0, #192] // ..*....... + // ldr q26, [x0, #256] // ...*...... + // ldr q14, [x0, #320] // ....*..... + // sqrdmulh v9.8H, v26.8H, v0.H[1] // .....*.... + // ldr q15, [x0, #448] // ......*... + // ldr q29, [x0, #384] // .......*.. + // sqrdmulh v13.8H, v15.8H, v0.H[1] // ........*. + + sub count, count, #1 +layer123_start: + mul v26.8H, v26.8H, v0.H[0] // ........*................................................................... + // gap // ............................................................................ + mul v21.8H, v14.8H, v0.H[0] // .............*.............................................................. + // gap // ............................................................................ + sqrdmulh v22.8H, v14.8H, v0.H[1] // ..............*............................................................. + // gap // ............................................................................ + mul v12.8H, v29.8H, v0.H[0] // ..................*......................................................... + // gap // ............................................................................ + mls v26.8H, v9.8H, v7.H[0] // ..........*................................................................. + // gap // ............................................................................ + sqrdmulh v14.8H, v29.8H, v0.H[1] // ...................*........................................................ + // gap // ............................................................................ + mls v21.8H, v22.8H, v7.H[0] // ...............*............................................................ + // gap // ............................................................................ + mul v22.8H, v15.8H, v0.H[0] // .......................*.................................................... + // gap // ............................................................................ + sub v9.8H, v4.8H, v26.8H // ...........*................................................................ + // gap // ............................................................................ + mls v12.8H, v14.8H, v7.H[0] // ....................*....................................................... + // gap // ............................................................................ + add v26.8H, v4.8H, v26.8H // ............*............................................................... + // gap // ............................................................................ + sub v14.8H, v17.8H, v21.8H // ................*........................................................... + // gap // ............................................................................ + add v21.8H, v17.8H, v21.8H // .................*.......................................................... + // gap // ............................................................................ + sub v15.8H, v16.8H, v12.8H // .....................*...................................................... + // gap // ............................................................................ + add v12.8H, v16.8H, v12.8H // ......................*..................................................... + // gap // ............................................................................ + mls v22.8H, v13.8H, v7.H[0] // .........................*.................................................. + // gap // ............................................................................ + mul v16.8H, v15.8H, v0.H[4] // ......................................*..................................... + // gap // ............................................................................ + sqrdmulh v15.8H, v15.8H, v0.H[5] // .......................................*.................................... + // gap // ............................................................................ + mul v4.8H, v12.8H, v0.H[2] // ............................*............................................... + // gap // ............................................................................ + sub v17.8H, v5.8H, v22.8H // ..........................*................................................. + // gap // ............................................................................ + add v22.8H, v5.8H, v22.8H // ...........................*................................................ + // gap // ............................................................................ + mls v16.8H, v15.8H, v7.H[0] // ........................................*................................... + // gap // ............................................................................ + sqrdmulh v12.8H, v12.8H, v0.H[3] // .............................*.............................................. + // gap // ............................................................................ + mul v15.8H, v17.8H, v0.H[4] // ...........................................*................................ + // gap // ............................................................................ + sqrdmulh v17.8H, v17.8H, v0.H[5] // ............................................*............................... + // gap // ............................................................................ + sub v5.8H, v9.8H, v16.8H // .........................................*.................................. + // gap // ............................................................................ + add v9.8H, v9.8H, v16.8H // ..........................................*................................. + // gap // ............................................................................ + mls v4.8H, v12.8H, v7.H[0] // ..............................*............................................. + // gap // ............................................................................ + mul v12.8H, v22.8H, v0.H[2] // .................................*.......................................... + // gap // ............................................................................ + mls v15.8H, v17.8H, v7.H[0] // .............................................*.............................. + // gap // ............................................................................ + sqrdmulh v22.8H, v22.8H, v0.H[3] // ..................................*......................................... + // gap // ............................................................................ + sub v16.8H, v26.8H, v4.8H // ...............................*............................................ + // gap // ............................................................................ + add v26.8H, v26.8H, v4.8H // ................................*........................................... + // gap // ............................................................................ + sub v4.8H, v14.8H, v15.8H // ..............................................*............................. + // gap // ............................................................................ + add v14.8H, v14.8H, v15.8H // ...............................................*............................ + // gap // ............................................................................ + mls v12.8H, v22.8H, v7.H[0] // ...................................*........................................ + // gap // ............................................................................ + mul v22.8H, v4.8H, v1.H[4] // ...............................................................*............ + // gap // ............................................................................ + mul v15.8H, v14.8H, v1.H[2] // ..........................................................*................. + // gap // ............................................................................ + sqrdmulh v14.8H, v14.8H, v1.H[3] // ...........................................................*................ + // gap // ............................................................................ + sub v17.8H, v21.8H, v12.8H // ....................................*....................................... + // gap // ............................................................................ + add v21.8H, v21.8H, v12.8H // .....................................*...................................... + // gap // ............................................................................ + sqrdmulh v12.8H, v4.8H, v1.H[5] // ................................................................*........... + // gap // ............................................................................ + mul v4.8H, v17.8H, v1.H[0] // .....................................................*...................... + // gap // ............................................................................ + mul v29.8H, v21.8H, v0.H[6] // ................................................*........................... + // gap // ............................................................................ + sqrdmulh v21.8H, v21.8H, v0.H[7] // .................................................*.......................... + // gap // ............................................................................ + sqrdmulh v17.8H, v17.8H, v1.H[1] // ......................................................*..................... + // gap // ............................................................................ + mls v15.8H, v14.8H, v7.H[0] // ............................................................*............... + // gap // ............................................................................ + mls v22.8H, v12.8H, v7.H[0] // .................................................................*.......... + // gap // ............................................................................ + mls v29.8H, v21.8H, v7.H[0] // ..................................................*......................... + // gap // ............................................................................ + mls v4.8H, v17.8H, v7.H[0] // .......................................................*.................... + // gap // ............................................................................ + sub v21.8H, v9.8H, v15.8H // .............................................................*.............. + // gap // ............................................................................ + sub v12.8H, v5.8H, v22.8H // ..................................................................*......... + // gap // ............................................................................ + add v22.8H, v5.8H, v22.8H // ...................................................................*........ + // gap // ............................................................................ + add v14.8H, v9.8H, v15.8H // ..............................................................*............. + // gap // ............................................................................ + sub v9.8H, v26.8H, v29.8H // ...................................................*........................ + // gap // ............................................................................ + add v26.8H, v26.8H, v29.8H // ....................................................*....................... + // gap // ............................................................................ + sub v15.8H, v16.8H, v4.8H // ........................................................*................... + // gap // ............................................................................ + add v16.8H, v16.8H, v4.8H // .........................................................*.................. + // gap // ............................................................................ + str q26, [x0], #(16) // ....................................................................*....... + // gap // ............................................................................ + ldr q4, [x0, #0] // e........................................................................... + // gap // ............................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + str q9, [x0, #48] // .....................................................................*...... + // gap // ............................................................................ + ldr q17, [x0, #64] // .e.......................................................................... + // gap // ............................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + str q16, [x0, #112] // ......................................................................*..... + // gap // ............................................................................ + ldr q16, [x0, #128] // ..e......................................................................... + // gap // ............................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + str q15, [x0, #176] // .......................................................................*.... + // gap // ............................................................................ + ldr q5, [x0, #192] // ...e........................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + str q14, [x0, #240] // ........................................................................*... + // gap // ............................................................................ + ldr q26, [x0, #256] // ....e....................................................................... + // gap // ............................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + str q21, [x0, #304] // .........................................................................*.. + // gap // ............................................................................ + ldr q14, [x0, #320] // .....e...................................................................... + // gap // ............................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + str q22, [x0, #368] // ..........................................................................*. + // gap // ............................................................................ + sqrdmulh v9.8H, v26.8H, v0.H[1] // .........e.................................................................. + // gap // ............................................................................ + str q12, [x0, #432] // ...........................................................................* + // gap // ............................................................................ + ldr q15, [x0, #448] // .......e.................................................................... + // gap // ............................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + ldr q29, [x0, #384] // ......e..................................................................... + // gap // ............................................................................ + // gap // ............................................................................ + // gap // ............................................................................ + sqrdmulh v13.8H, v15.8H, v0.H[1] // ........................e................................................... + // gap // ............................................................................ + + // original source code + // ldr q8, [x0, #0] // e................|..........................................................e............. + // ldr q9, [x0, #(1*(512/8))] // ..e..............|............................................................e........... + // ldr q10, [x0, #(2*(512/8))] // ....e............|..............................................................e......... + // ldr q11, [x0, #(3*(512/8))] // ......e..........|................................................................e....... + // ldr q12, [x0, #(4*(512/8))] // ........e........|..................................................................e..... + // ldr q13, [x0, #(5*(512/8))] // ..........e......|....................................................................e... + // ldr q14, [x0, #(6*(512/8))] // ...............e.|........................................................................ + // ldr q15, [x0, #(7*(512/8))] // ..............e..|........................................................................ + // mul v24.8h, v12.8h, v0.h[0] // .................*........................................................................ + // sqrdmulh v12.8h, v12.8h, v0.h[1] // ............e....|......................................................................e. + // mls v24.8h, v12.8h, v7.h[0] // .................|...*.................................................................... + // sub v12.8h, v8.8h, v24.8h // .................|.......*................................................................ + // add v8.8h, v8.8h, v24.8h // .................|.........*.............................................................. + // mul v24.8h, v13.8h, v0.h[0] // .................|*....................................................................... + // sqrdmulh v13.8h, v13.8h, v0.h[1] // .................|.*...................................................................... + // mls v24.8h, v13.8h, v7.h[0] // .................|.....*.................................................................. + // sub v13.8h, v9.8h, v24.8h // .................|..........*............................................................. + // add v9.8h, v9.8h, v24.8h // .................|...........*............................................................ + // mul v24.8h, v14.8h, v0.h[0] // .................|..*..................................................................... + // sqrdmulh v14.8h, v14.8h, v0.h[1] // .................|....*................................................................... + // mls v24.8h, v14.8h, v7.h[0] // .................|........*............................................................... + // sub v14.8h, v10.8h, v24.8h // .................|............*........................................................... + // add v10.8h, v10.8h, v24.8h // .................|.............*.......................................................... + // mul v24.8h, v15.8h, v0.h[0] // .................|......*................................................................. + // sqrdmulh v15.8h, v15.8h, v0.h[1] // ................e|........................................................................ + // mls v24.8h, v15.8h, v7.h[0] // .................|..............*......................................................... + // sub v15.8h, v11.8h, v24.8h // .................|..................*..................................................... + // add v11.8h, v11.8h, v24.8h // .................|...................*.................................................... + // mul v24.8h, v10.8h, v0.h[2] // .................|.................*...................................................... + // sqrdmulh v10.8h, v10.8h, v0.h[3] // .................|.....................*.................................................. + // mls v24.8h, v10.8h, v7.h[0] // .................|..........................*............................................. + // sub v10.8h, v8.8h, v24.8h // .................|..............................*......................................... + // add v8.8h, v8.8h, v24.8h // .................|...............................*........................................ + // mul v24.8h, v11.8h, v0.h[2] // .................|...........................*............................................ + // sqrdmulh v11.8h, v11.8h, v0.h[3] // .................|.............................*.......................................... + // mls v24.8h, v11.8h, v7.h[0] // .................|..................................*..................................... + // sub v11.8h, v9.8h, v24.8h // .................|......................................*................................. + // add v9.8h, v9.8h, v24.8h // .................|.......................................*................................ + // mul v24.8h, v14.8h, v0.h[4] // .................|...............*........................................................ + // sqrdmulh v14.8h, v14.8h, v0.h[5] // .................|................*....................................................... + // mls v24.8h, v14.8h, v7.h[0] // .................|....................*................................................... + // sub v14.8h, v12.8h, v24.8h // .................|........................*............................................... + // add v12.8h, v12.8h, v24.8h // .................|.........................*.............................................. + // mul v24.8h, v15.8h, v0.h[4] // .................|......................*................................................. + // sqrdmulh v15.8h, v15.8h, v0.h[5] // .................|.......................*................................................ + // mls v24.8h, v15.8h, v7.h[0] // .................|............................*........................................... + // sub v15.8h, v13.8h, v24.8h // .................|................................*....................................... + // add v13.8h, v13.8h, v24.8h // .................|.................................*...................................... + // mul v24.8h, v9.8h, v0.h[6] // .................|..........................................*............................. + // sqrdmulh v9.8h, v9.8h, v0.h[7] // .................|...........................................*............................ + // mls v24.8h, v9.8h, v7.h[0] // .................|...............................................*........................ + // sub v9.8h, v8.8h, v24.8h // .................|.....................................................*.................. + // add v8.8h, v8.8h, v24.8h // .................|......................................................*................. + // mul v24.8h, v11.8h, v1.h[0] // .................|.........................................*.............................. + // sqrdmulh v11.8h, v11.8h, v1.h[1] // .................|............................................*........................... + // mls v24.8h, v11.8h, v7.h[0] // .................|................................................*....................... + // sub v11.8h, v10.8h, v24.8h // .................|.......................................................*................ + // add v10.8h, v10.8h, v24.8h // .................|........................................................*............... + // mul v24.8h, v13.8h, v1.h[2] // .................|....................................*................................... + // sqrdmulh v13.8h, v13.8h, v1.h[3] // .................|.....................................*.................................. + // mls v24.8h, v13.8h, v7.h[0] // .................|.............................................*.......................... + // sub v13.8h, v12.8h, v24.8h // .................|.................................................*...................... + // add v12.8h, v12.8h, v24.8h // .................|....................................................*................... + // mul v24.8h, v15.8h, v1.h[4] // .................|...................................*.................................... + // sqrdmulh v15.8h, v15.8h, v1.h[5] // .................|........................................*............................... + // mls v24.8h, v15.8h, v7.h[0] // .................|..............................................*......................... + // sub v15.8h, v14.8h, v24.8h // .................|..................................................*..................... + // add v14.8h, v14.8h, v24.8h // .................|...................................................*.................... + // str q8, [x0], #(16) // .................|.........................................................*.............. + // str q9, [x0, #(-16 + 1*(512/8))] // .*...............|...........................................................*............ + // str q10, [x0, #(-16 + 2*(512/8))] // ...*.............|.............................................................*.......... + // str q11, [x0, #(-16 + 3*(512/8))] // .....*...........|...............................................................*........ + // str q12, [x0, #(-16 + 4*(512/8))] // .......*.........|.................................................................*...... + // str q13, [x0, #(-16 + 5*(512/8))] // .........*.......|...................................................................*.... + // str q14, [x0, #(-16 + 6*(512/8))] // ...........*.....|.....................................................................*.. + // str q15, [x0, #(-16 + 7*(512/8))] // .............*...|.......................................................................* + + sub count, count, #1 + cbnz count, layer123_start + mul v12.8H, v15.8H, v0.H[0] // .......*.......................................................... + // gap // .................................................................. + sqrdmulh v10.8H, v14.8H, v0.H[1] // ..*............................................................... + // gap // .................................................................. + mul v14.8H, v14.8H, v0.H[0] // .*................................................................ + // gap // .................................................................. + sqrdmulh v3.8H, v29.8H, v0.H[1] // .....*............................................................ + // gap // .................................................................. + mls v12.8H, v13.8H, v7.H[0] // ...............*.................................................. + // gap // .................................................................. + mul v8.8H, v29.8H, v0.H[0] // ...*.............................................................. + // gap // .................................................................. + mls v14.8H, v10.8H, v7.H[0] // ......*........................................................... + // gap // .................................................................. + mul v13.8H, v26.8H, v0.H[0] // *................................................................. + // gap // .................................................................. + add v21.8H, v5.8H, v12.8H // ....................*............................................. + // gap // .................................................................. + sub v22.8H, v5.8H, v12.8H // ...................*.............................................. + // gap // .................................................................. + mls v8.8H, v3.8H, v7.H[0] // .........*........................................................ + // gap // .................................................................. + mul v15.8H, v21.8H, v0.H[2] // ............................*..................................... + // gap // .................................................................. + sqrdmulh v26.8H, v21.8H, v0.H[3] // ..............................*................................... + // gap // .................................................................. + sqrdmulh v12.8H, v22.8H, v0.H[5] // ........................*......................................... + // gap // .................................................................. + mul v29.8H, v22.8H, v0.H[4] // .......................*.......................................... + // gap // .................................................................. + sub v21.8H, v16.8H, v8.8H // .............*.................................................... + // gap // .................................................................. + mls v15.8H, v26.8H, v7.H[0] // ...................................*.............................. + // gap // .................................................................. + sub v11.8H, v17.8H, v14.8H // ...........*...................................................... + // gap // .................................................................. + mls v29.8H, v12.8H, v7.H[0] // .............................*.................................... + // gap // .................................................................. + mul v3.8H, v21.8H, v0.H[4] // ................*................................................. + // gap // .................................................................. + sqrdmulh v26.8H, v21.8H, v0.H[5] // .................*................................................ + // gap // .................................................................. + mls v13.8H, v9.8H, v7.H[0] // ....*............................................................. + // gap // .................................................................. + sub v21.8H, v11.8H, v29.8H // .................................*................................ + // gap // .................................................................. + add v12.8H, v17.8H, v14.8H // ............*..................................................... + // gap // .................................................................. + mls v3.8H, v26.8H, v7.H[0] // .....................*............................................ + // gap // .................................................................. + sqrdmulh v26.8H, v21.8H, v1.H[5] // .........................................*........................ + // gap // .................................................................. + mul v21.8H, v21.8H, v1.H[4] // ....................................*............................. + // gap // .................................................................. + add v16.8H, v16.8H, v8.8H // ..............*................................................... + // gap // .................................................................. + sub v28.8H, v4.8H, v13.8H // ........*......................................................... + // gap // .................................................................. + sub v22.8H, v12.8H, v15.8H // .......................................*.......................... + // gap // .................................................................. + mls v21.8H, v26.8H, v7.H[0] // ...............................................*.................. + // gap // .................................................................. + sub v9.8H, v28.8H, v3.8H // .........................*........................................ + // gap // .................................................................. + add v5.8H, v28.8H, v3.8H // ..........................*....................................... + // gap // .................................................................. + add v10.8H, v4.8H, v13.8H // ..........*....................................................... + // gap // .................................................................. + add v26.8H, v9.8H, v21.8H // ....................................................*............. + // gap // .................................................................. + mul v14.8H, v22.8H, v1.H[0] // ..........................................*....................... + // gap // .................................................................. + add v12.8H, v12.8H, v15.8H // ........................................*......................... + // gap // .................................................................. + sqrdmulh v22.8H, v22.8H, v1.H[1] // .............................................*.................... + // gap // .................................................................. + sub v25.8H, v9.8H, v21.8H // ...................................................*.............. + // gap // .................................................................. + add v21.8H, v11.8H, v29.8H // ..................................*............................... + // gap // .................................................................. + sqrdmulh v9.8H, v16.8H, v0.H[3] // ......................*........................................... + // gap // .................................................................. + mul v4.8H, v16.8H, v0.H[2] // ..................*............................................... + // gap // .................................................................. + mul v15.8H, v12.8H, v0.H[6] // ...........................................*...................... + // gap // .................................................................. + mls v14.8H, v22.8H, v7.H[0] // .................................................*................ + // gap // .................................................................. + str q25, [x0, #448] // .................................................................* + // gap // .................................................................. + mls v4.8H, v9.8H, v7.H[0] // ...........................*...................................... + // gap // .................................................................. + sqrdmulh v22.8H, v21.8H, v1.H[3] // ......................................*........................... + // gap // .................................................................. + mul v16.8H, v21.8H, v1.H[2] // .....................................*............................ + // gap // .................................................................. + sqrdmulh v12.8H, v12.8H, v0.H[7] // ............................................*..................... + // gap // .................................................................. + sub v17.8H, v10.8H, v4.8H // ...............................*.................................. + // gap // .................................................................. + str q26, [x0, #384] // ................................................................*. + // gap // .................................................................. + mls v16.8H, v22.8H, v7.H[0] // ..............................................*................... + // gap // .................................................................. + sub v21.8H, v17.8H, v14.8H // ........................................................*......... + // gap // .................................................................. + add v26.8H, v17.8H, v14.8H // .........................................................*........ + // gap // .................................................................. + mls v15.8H, v12.8H, v7.H[0] // ................................................*................. + // gap // .................................................................. + add v27.8H, v10.8H, v4.8H // ................................*................................. + // gap // .................................................................. + str q21, [x0, #192] // .............................................................*.... + // gap // .................................................................. + sub v9.8H, v5.8H, v16.8H // ..................................................*............... + // gap // .................................................................. + str q26, [x0, #128] // ............................................................*..... + // gap // .................................................................. + sub v12.8H, v27.8H, v15.8H // ......................................................*........... + // gap // .................................................................. + str q9, [x0, #320] // ...............................................................*.. + // gap // .................................................................. + add v14.8H, v5.8H, v16.8H // .....................................................*............ + // gap // .................................................................. + str q12, [x0, #64] // ...........................................................*...... + // gap // .................................................................. + add v22.8H, v27.8H, v15.8H // .......................................................*.......... + // gap // .................................................................. + str q14, [x0, #256] // ..............................................................*... + // gap // .................................................................. + // gap // .................................................................. + // gap // .................................................................. + str q22, [x0], #(16) // ..........................................................*....... + // gap // .................................................................. + + // original source code + // mul v26.8H, v26.8H, v0.H[0] // .......*.......................................................... + // mul v21.8H, v14.8H, v0.H[0] // ..*............................................................... + // sqrdmulh v22.8H, v14.8H, v0.H[1] // .*................................................................ + // mul v12.8H, v29.8H, v0.H[0] // .....*............................................................ + // mls v26.8H, v9.8H, v7.H[0] // .....................*............................................ + // sqrdmulh v14.8H, v29.8H, v0.H[1] // ...*.............................................................. + // mls v21.8H, v22.8H, v7.H[0] // ......*........................................................... + // mul v22.8H, v15.8H, v0.H[0] // *................................................................. + // sub v9.8H, v4.8H, v26.8H // ............................*..................................... + // mls v12.8H, v14.8H, v7.H[0] // ..........*....................................................... + // add v26.8H, v4.8H, v26.8H // .................................*................................ + // sub v14.8H, v17.8H, v21.8H // .................*................................................ + // add v21.8H, v17.8H, v21.8H // .......................*.......................................... + // sub v15.8H, v16.8H, v12.8H // ...............*.................................................. + // add v12.8H, v16.8H, v12.8H // ...........................*...................................... + // mls v22.8H, v13.8H, v7.H[0] // ....*............................................................. + // mul v16.8H, v15.8H, v0.H[4] // ...................*.............................................. + // sqrdmulh v15.8H, v15.8H, v0.H[5] // ....................*............................................. + // mul v4.8H, v12.8H, v0.H[2] // .........................................*........................ + // sub v17.8H, v5.8H, v22.8H // .........*........................................................ + // add v22.8H, v5.8H, v22.8H // ........*......................................................... + // mls v16.8H, v15.8H, v7.H[0] // ........................*......................................... + // sqrdmulh v12.8H, v12.8H, v0.H[3] // ........................................*......................... + // mul v15.8H, v17.8H, v0.H[4] // ..............*................................................... + // sqrdmulh v17.8H, v17.8H, v0.H[5] // .............*.................................................... + // sub v5.8H, v9.8H, v16.8H // ...............................*.................................. + // add v9.8H, v9.8H, v16.8H // ................................*................................. + // mls v4.8H, v12.8H, v7.H[0] // .............................................*.................... + // mul v12.8H, v22.8H, v0.H[2] // ...........*...................................................... + // mls v15.8H, v17.8H, v7.H[0] // ..................*............................................... + // sqrdmulh v22.8H, v22.8H, v0.H[3] // ............*..................................................... + // sub v16.8H, v26.8H, v4.8H // .................................................*................ + // add v26.8H, v26.8H, v4.8H // .......................................................*.......... + // sub v4.8H, v14.8H, v15.8H // ......................*........................................... + // add v14.8H, v14.8H, v15.8H // .......................................*.......................... + // mls v12.8H, v22.8H, v7.H[0] // ................*................................................. + // mul v22.8H, v4.8H, v1.H[4] // ..........................*....................................... + // mul v15.8H, v14.8H, v1.H[2] // ...............................................*.................. + // sqrdmulh v14.8H, v14.8H, v1.H[3] // ..............................................*................... + // sub v17.8H, v21.8H, v12.8H // .............................*.................................... + // add v21.8H, v21.8H, v12.8H // ....................................*............................. + // sqrdmulh v12.8H, v4.8H, v1.H[5] // .........................*........................................ + // mul v4.8H, v17.8H, v1.H[0] // ...................................*.............................. + // mul v29.8H, v21.8H, v0.H[6] // ..........................................*....................... + // sqrdmulh v21.8H, v21.8H, v0.H[7] // ................................................*................. + // sqrdmulh v17.8H, v17.8H, v1.H[1] // .....................................*............................ + // mls v15.8H, v14.8H, v7.H[0] // ...................................................*.............. + // mls v22.8H, v12.8H, v7.H[0] // ..............................*................................... + // mls v29.8H, v21.8H, v7.H[0] // ......................................................*........... + // mls v4.8H, v17.8H, v7.H[0] // ...........................................*...................... + // sub v21.8H, v9.8H, v15.8H // .........................................................*........ + // sub v12.8H, v5.8H, v22.8H // ......................................*........................... + // add v22.8H, v5.8H, v22.8H // ..................................*............................... + // add v14.8H, v9.8H, v15.8H // .............................................................*.... + // sub v9.8H, v26.8H, v29.8H // ...........................................................*...... + // add v26.8H, v26.8H, v29.8H // ...............................................................*.. + // sub v15.8H, v16.8H, v4.8H // ....................................................*............. + // add v16.8H, v16.8H, v4.8H // .....................................................*............ + // str q26, [x0], #(16) // .................................................................* + // str q9, [x0, #48] // ..............................................................*... + // str q16, [x0, #112] // ..........................................................*....... + // str q15, [x0, #176] // ........................................................*......... + // str q14, [x0, #240] // ................................................................*. + // str q21, [x0, #304] // ............................................................*..... + // str q22, [x0, #368] // ..................................................*............... + // str q12, [x0, #432] // ............................................*..................... + + + restore inp, STACK0 + mov count, #8 + + .p2align 2 + ldr q13, [x3], #16 // ..*....................................................... + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + ldr q6, [x1, #48] // *......................................................... + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + ldr q3, [x1, #32] // .*........................................................ + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + sqrdmulh v4.8H, v6.8H, v13.H[1] // ...*...................................................... + // gap // .......................................................... + mul v18.8H, v6.8H, v13.H[0] // .....*.................................................... + // gap // .......................................................... + sqrdmulh v0.8H, v3.8H, v13.H[1] // .........*................................................ + // gap // .......................................................... + ldr q22, [x1, #16] // .......*.................................................. + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + mls v18.8H, v4.8H, v7.H[0] // ........*................................................. + // gap // .......................................................... + mul v24.8H, v3.8H, v13.H[0] // ......*................................................... + // gap // .......................................................... + ldr q9, [x1, #0] // ..........*............................................... + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + sub v21.8H, v22.8H, v18.8H // .............*............................................ + // gap // .......................................................... + add v14.8H, v22.8H, v18.8H // ...........*.............................................. + // gap // .......................................................... + mls v24.8H, v0.8H, v7.H[0] // ............*............................................. + // gap // .......................................................... + mul v0.8H, v21.8H, v13.H[4] // .................*........................................ + // gap // .......................................................... + sqrdmulh v21.8H, v21.8H, v13.H[5] // ................*......................................... + // gap // .......................................................... + sqrdmulh v26.8H, v14.8H, v13.H[3] // ..............*........................................... + // gap // .......................................................... + mul v18.8H, v14.8H, v13.H[2] // ...............*.......................................... + // gap // .......................................................... + ldr q11, [x4], #(6*16) // ....*..................................................... + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + ldr q5, [x4, #-80] // ..................*....................................... + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + mls v18.8H, v26.8H, v7.H[0] // ...................*...................................... + // gap // .......................................................... + add v26.8H, v9.8H, v24.8H // ....................*..................................... + // gap // .......................................................... + mls v0.8H, v21.8H, v7.H[0] // .....................*.................................... + // gap // .......................................................... + sub v22.8H, v9.8H, v24.8H // ......................*................................... + // gap // .......................................................... + add v12.8H, v26.8H, v18.8H // ........................*................................. + // gap // .......................................................... + sub v14.8H, v26.8H, v18.8H // .......................*.................................. + // gap // .......................................................... + sub v21.8H, v22.8H, v0.8H // .........................*................................ + // gap // .......................................................... + add v26.8H, v22.8H, v0.8H // ..........................*............................... + // gap // .......................................................... + trn2 v17.4S, v12.4S, v14.4S // ............................*............................. + // gap // .......................................................... + trn1 v29.4S, v12.4S, v14.4S // ...........................*.............................. + // gap // .......................................................... + trn2 v4.4S, v26.4S, v21.4S // .............................*............................ + // gap // .......................................................... + trn1 v16.4S, v26.4S, v21.4S // ................................*......................... + // gap // .......................................................... + ldr q0, [x4, #-16] // ..................................................*....... + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + trn2 v21.2D, v29.2D, v16.2D // ...................................*...................... + // gap // .......................................................... + trn2 v22.2D, v17.2D, v4.2D // ...............................*.......................... + // gap // .......................................................... + sqrdmulh v26.8H, v21.8H, v5.8H // ..........................................*............... + // gap // .......................................................... + mul v15.8H, v22.8H, v11.8H // .................................*........................ + // gap // .......................................................... + sqrdmulh v22.8H, v22.8H, v5.8H // ..................................*....................... + // gap // .......................................................... + ldr q12, [x4, #-64] // ........................................*................. + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + trn1 v14.2D, v17.2D, v4.2D // ....................................*..................... + // gap // .......................................................... + mls v15.8H, v22.8H, v7.H[0] // ......................................*................... + // gap // .......................................................... + ldr q5, [x4, #-48] // ..............................*........................... + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + mul v9.8H, v21.8H, v11.8H // .....................................*.................... + // gap // .......................................................... + add v18.8H, v14.8H, v15.8H // .........................................*................ + // gap // .......................................................... + ldr q21, [x4, #-32] // ...............................................*.......... + // gap // .......................................................... + // gap // .......................................................... + // gap // .......................................................... + mul v12.8H, v18.8H, v12.8H // ............................................*............. + // gap // .......................................................... + sqrdmulh v22.8H, v18.8H, v5.8H // .............................................*............ + // gap // .......................................................... + mls v9.8H, v26.8H, v7.H[0] // ..............................................*........... + // gap // .......................................................... + trn1 v16.2D, v29.2D, v16.2D // .......................................*.................. + // gap // .......................................................... + sub v14.8H, v14.8H, v15.8H // ...........................................*.............. + // gap // .......................................................... + mls v12.8H, v22.8H, v7.H[0] // ................................................*......... + // gap // .......................................................... + add v26.8H, v16.8H, v9.8H // .................................................*........ + // gap // .......................................................... + sqrdmulh v25.8H, v14.8H, v0.8H // .......................................................*.. + // gap // .......................................................... + mul v18.8H, v14.8H, v21.8H // .....................................................*.... + // gap // .......................................................... + add v27.8H, v26.8H, v12.8H // ....................................................*..... + // gap // .......................................................... + sub v28.8H, v26.8H, v12.8H // ...................................................*...... + // gap // .......................................................... + sub v8.8H, v16.8H, v9.8H // ........................................................*. + // gap // .......................................................... + sqdmulh v24.8H, v27.8H, v7.H[1] // .........................................................* + // gap // .......................................................... + sqdmulh v0.8H, v28.8H, v7.H[1] // ......................................................*... + // gap // .......................................................... + + // original source code + // ldr q0, [x1, #48] // .*........................................................ + // ldr q23, [x1, #32] // ..*....................................................... + // ldr q10, [x3], #16 // *......................................................... + // sqrdmulh v2.8H, v0.8H, v10.H[1] // ...*...................................................... + // ldr q22, [x4], #(6*16) // .................*........................................ + // mul v0.8H, v0.8H, v10.H[0] // ....*..................................................... + // mul v26.8H, v23.8H, v10.H[0] // ........*................................................. + // ldr q6, [x1, #16] // ......*................................................... + // mls v0.8H, v2.8H, v7.H[0] // .......*.................................................. + // sqrdmulh v24.8H, v23.8H, v10.H[1] // .....*.................................................... + // ldr q18, [x1, #0] // .........*................................................ + // add v14.8H, v6.8H, v0.8H // ...........*.............................................. + // mls v26.8H, v24.8H, v7.H[0] // ............*............................................. + // sub v17.8H, v6.8H, v0.8H // ..........*............................................... + // sqrdmulh v21.8H, v14.8H, v10.H[3] // ...............*.......................................... + // mul v24.8H, v14.8H, v10.H[2] // ................*......................................... + // sqrdmulh v8.8H, v17.8H, v10.H[5] // ..............*........................................... + // mul v31.8H, v17.8H, v10.H[4] // .............*............................................ + // ldr q16, [x4, #-80] // ..................*....................................... + // mls v24.8H, v21.8H, v7.H[0] // ...................*...................................... + // add v6.8H, v18.8H, v26.8H // ....................*..................................... + // mls v31.8H, v8.8H, v7.H[0] // .....................*.................................... + // sub v0.8H, v18.8H, v26.8H // ......................*................................... + // sub v20.8H, v6.8H, v24.8H // ........................*................................. + // add v28.8H, v6.8H, v24.8H // .......................*.................................. + // sub v23.8H, v0.8H, v31.8H // .........................*................................ + // add v18.8H, v0.8H, v31.8H // ..........................*............................... + // trn1 v19.4S, v28.4S, v20.4S // ............................*............................. + // trn2 v26.4S, v28.4S, v20.4S // ...........................*.............................. + // trn2 v9.4S, v18.4S, v23.4S // .............................*............................ + // ldr q12, [x4, #-48] // ........................................*................. + // trn2 v15.2D, v26.2D, v9.2D // .................................*........................ + // trn1 v0.4S, v18.4S, v23.4S // ..............................*........................... + // mul v24.8H, v15.8H, v22.8H // ...................................*...................... + // sqrdmulh v10.8H, v15.8H, v16.8H // ....................................*..................... + // trn2 v28.2D, v19.2D, v0.2D // ................................*......................... + // trn1 v21.2D, v26.2D, v9.2D // ......................................*................... + // mul v26.8H, v28.8H, v22.8H // .........................................*................ + // mls v24.8H, v10.8H, v7.H[0] // .......................................*.................. + // trn1 v4.2D, v19.2D, v0.2D // ...............................................*.......... + // ldr q13, [x4, #-64] // .....................................*.................... + // add v29.8H, v21.8H, v24.8H // ..........................................*............... + // sqrdmulh v31.8H, v28.8H, v16.8H // ..................................*....................... + // sub v14.8H, v21.8H, v24.8H // ................................................*......... + // mul v15.8H, v29.8H, v13.8H // ............................................*............. + // sqrdmulh v12.8H, v29.8H, v12.8H // .............................................*............ + // mls v26.8H, v31.8H, v7.H[0] // ..............................................*........... + // ldr q31, [x4, #-32] // ...........................................*.............. + // mls v15.8H, v12.8H, v7.H[0] // .................................................*........ + // add v29.8H, v4.8H, v26.8H // ..................................................*....... + // ldr q8, [x4, #-16] // ...............................*.......................... + // sub v28.8H, v29.8H, v15.8H // ......................................................*... + // add v27.8H, v29.8H, v15.8H // .....................................................*.... + // mul v18.8H, v14.8H, v31.8H // ....................................................*..... + // sqdmulh v0.8H, v28.8H, v7.H[1] // .........................................................* + // sqrdmulh v25.8H, v14.8H, v8.8H // ...................................................*...... + // sub v8.8H, v4.8H, v26.8H // .......................................................*.. + // sqdmulh v24.8H, v27.8H, v7.H[1] // ........................................................*. + + sub count, count, #1 +layer4567_start: + srshr v6.8H, v0.8H, #11 // ...............................................................*........ + // gap // ........................................................................ + mls v18.8H, v25.8H, v7.H[0] // ........................................................*............... + // gap // ........................................................................ + ldr q0, [x1, #112] // ...e.................................................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + mls v28.8H, v6.8H, v7.H[0] // ................................................................*....... + // gap // ........................................................................ + sub v30.8H, v8.8H, v18.8H // .........................................................*.............. + // gap // ........................................................................ + add v29.8H, v8.8H, v18.8H // ..........................................................*............. + // gap // ........................................................................ + srshr v19.8H, v24.8H, #11 // ............................................................*........... + // gap // ........................................................................ + sqdmulh v5.8H, v30.8H, v7.H[1] // ....................................................................*... + // gap // ........................................................................ + sqdmulh v24.8H, v29.8H, v7.H[1] // .................................................................*...... + // gap // ........................................................................ + ldr q23, [x1, #96] // ..e..................................................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + srshr v12.8H, v5.8H, #11 // .....................................................................*.. + // gap // ........................................................................ + srshr v11.8H, v24.8H, #11 // ..................................................................*..... + // gap // ........................................................................ + ldr q10, [x3], #16 // ....e................................................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + mls v30.8H, v12.8H, v7.H[0] // ......................................................................*. + // gap // ........................................................................ + mls v29.8H, v11.8H, v7.H[0] // ...................................................................*.... + // gap // ........................................................................ + mls v27.8H, v19.8H, v7.H[0] // .............................................................*.......... + // gap // ........................................................................ + sqrdmulh v2.8H, v0.8H, v10.H[1] // ...........e............................................................ + // gap // ........................................................................ + ldr q22, [x4], #(6*16) // .................................e...................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + st4 {v27.4S, v28.4S, v29.4S, v30.4S}, [x1], #64 // .......................................................................* + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + mul v0.8H, v0.8H, v10.H[0] // ..........e............................................................. + // gap // ........................................................................ + mul v26.8H, v23.8H, v10.H[0] // .....e.................................................................. + // gap // ........................................................................ + ldr q6, [x1, #16] // .e...................................................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + mls v0.8H, v2.8H, v7.H[0] // ............e........................................................... + // gap // ........................................................................ + sqrdmulh v24.8H, v23.8H, v10.H[1] // ......e................................................................. + // gap // ........................................................................ + ldr q18, [x1, #0] // e....................................................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + add v14.8H, v6.8H, v0.8H // ..............e......................................................... + // gap // ........................................................................ + mls v26.8H, v24.8H, v7.H[0] // .......e................................................................ + // gap // ........................................................................ + sub v17.8H, v6.8H, v0.8H // .............e.......................................................... + // gap // ........................................................................ + sqrdmulh v21.8H, v14.8H, v10.H[3] // ................e....................................................... + // gap // ........................................................................ + mul v24.8H, v14.8H, v10.H[2] // ...............e........................................................ + // gap // ........................................................................ + sqrdmulh v8.8H, v17.8H, v10.H[5] // .....................e.................................................. + // gap // ........................................................................ + mul v31.8H, v17.8H, v10.H[4] // ....................e................................................... + // gap // ........................................................................ + ldr q16, [x4, #-80] // ..................................e..................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + mls v24.8H, v21.8H, v7.H[0] // .................e...................................................... + // gap // ........................................................................ + add v6.8H, v18.8H, v26.8H // .........e.............................................................. + // gap // ........................................................................ + mls v31.8H, v8.8H, v7.H[0] // ......................e................................................. + // gap // ........................................................................ + sub v0.8H, v18.8H, v26.8H // ........e............................................................... + // gap // ........................................................................ + sub v20.8H, v6.8H, v24.8H // ..................e..................................................... + // gap // ........................................................................ + add v28.8H, v6.8H, v24.8H // ...................e.................................................... + // gap // ........................................................................ + sub v23.8H, v0.8H, v31.8H // .......................e................................................ + // gap // ........................................................................ + add v18.8H, v0.8H, v31.8H // ........................e............................................... + // gap // ........................................................................ + trn1 v19.4S, v28.4S, v20.4S // .........................e.............................................. + // gap // ........................................................................ + trn2 v26.4S, v28.4S, v20.4S // ..........................e............................................. + // gap // ........................................................................ + trn2 v9.4S, v18.4S, v23.4S // ............................e........................................... + // gap // ........................................................................ + ldr q12, [x4, #-48] // ....................................e................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + trn2 v15.2D, v26.2D, v9.2D // ..............................e......................................... + // gap // ........................................................................ + trn1 v0.4S, v18.4S, v23.4S // ...........................e............................................ + // gap // ........................................................................ + mul v24.8H, v15.8H, v22.8H // ............................................e........................... + // gap // ........................................................................ + sqrdmulh v10.8H, v15.8H, v16.8H // .............................................e.......................... + // gap // ........................................................................ + trn2 v28.2D, v19.2D, v0.2D // .............................e.......................................... + // gap // ........................................................................ + trn1 v21.2D, v26.2D, v9.2D // ................................e....................................... + // gap // ........................................................................ + mul v26.8H, v28.8H, v22.8H // .......................................e................................ + // gap // ........................................................................ + mls v24.8H, v10.8H, v7.H[0] // ..............................................e......................... + // gap // ........................................................................ + trn1 v4.2D, v19.2D, v0.2D // ...............................e........................................ + // gap // ........................................................................ + ldr q13, [x4, #-64] // ...................................e.................................... + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + add v29.8H, v21.8H, v24.8H // ................................................e....................... + // gap // ........................................................................ + sqrdmulh v31.8H, v28.8H, v16.8H // ........................................e............................... + // gap // ........................................................................ + sub v14.8H, v21.8H, v24.8H // ...............................................e........................ + // gap // ........................................................................ + mul v15.8H, v29.8H, v13.8H // .................................................e...................... + // gap // ........................................................................ + sqrdmulh v12.8H, v29.8H, v12.8H // ..................................................e..................... + // gap // ........................................................................ + mls v26.8H, v31.8H, v7.H[0] // .........................................e.............................. + // gap // ........................................................................ + ldr q31, [x4, #-32] // .....................................e.................................. + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + mls v15.8H, v12.8H, v7.H[0] // ...................................................e.................... + // gap // ........................................................................ + add v29.8H, v4.8H, v26.8H // ...........................................e............................ + // gap // ........................................................................ + ldr q8, [x4, #-16] // ......................................e................................. + // gap // ........................................................................ + // gap // ........................................................................ + // gap // ........................................................................ + sub v28.8H, v29.8H, v15.8H // ....................................................e................... + // gap // ........................................................................ + add v27.8H, v29.8H, v15.8H // .....................................................e.................. + // gap // ........................................................................ + mul v18.8H, v14.8H, v31.8H // ......................................................e................. + // gap // ........................................................................ + sqdmulh v0.8H, v28.8H, v7.H[1] // ..............................................................e......... + // gap // ........................................................................ + sqrdmulh v25.8H, v14.8H, v8.8H // .......................................................e................ + // gap // ........................................................................ + sub v8.8H, v4.8H, v26.8H // ..........................................e............................. + // gap // ........................................................................ + sqdmulh v24.8H, v27.8H, v7.H[1] // ...........................................................e............ + // gap // ........................................................................ + + // original source code + // ldr q8, [x1, #(16*0)] // ......................e...............................................|.................. + // ldr q9, [x1, #(16*1)] // ...................e..................................................|.................. + // ldr q10, [x1, #(16*2)] // .......e..............................................................|........e......... + // ldr q11, [x1, #(16*3)] // e.....................................................................|.e................ + // ldr q0, [x3], #16 // ..........e...........................................................|...........e...... + // mul v24.8h, v10.8h, v0.h[0] // ..................e...................................................|.................. + // sqrdmulh v10.8h, v10.8h, v0.h[1] // .....................e................................................|.................. + // mls v24.8h, v10.8h, v7.h[0] // ........................e.............................................|.................. + // sub v10.8h, v8.8h, v24.8h // ..................................e...................................|.................. + // add v8.8h, v8.8h, v24.8h // ................................e.....................................|.................. + // mul v24.8h, v11.8h, v0.h[0] // .................e....................................................|.................. + // sqrdmulh v11.8h, v11.8h, v0.h[1] // ..............e.......................................................|...............e.. + // mls v24.8h, v11.8h, v7.h[0] // ....................e.................................................|.................. + // sub v11.8h, v9.8h, v24.8h // .........................e............................................|.................. + // add v9.8h, v9.8h, v24.8h // .......................e..............................................|.................. + // mul v24.8h, v9.8h, v0.h[2] // ...........................e..........................................|.................. + // sqrdmulh v9.8h, v9.8h, v0.h[3] // ..........................e...........................................|.................. + // mls v24.8h, v9.8h, v7.h[0] // ...............................e......................................|.................. + // sub v9.8h, v8.8h, v24.8h // ...................................e..................................|.................. + // add v8.8h, v8.8h, v24.8h // ....................................e.................................|.................. + // mul v24.8h, v11.8h, v0.h[4] // .............................e........................................|.................. + // sqrdmulh v11.8h, v11.8h, v0.h[5] // ............................e.........................................|.................. + // mls v24.8h, v11.8h, v7.h[0] // .................................e....................................|.................. + // sub v11.8h, v10.8h, v24.8h // .....................................e................................|.................. + // add v10.8h, v10.8h, v24.8h // ......................................e...............................|.................. + // trn1 v25.4s, v8.4s, v9.4s // .......................................e..............................|.................. + // trn2 v26.4s, v8.4s, v9.4s // ........................................e.............................|.................. + // trn1 v27.4s, v10.4s, v11.4s // ............................................e.........................|.................. + // trn2 v28.4s, v10.4s, v11.4s // .........................................e............................|.................. + // trn2 v10.2d, v25.2d, v27.2d // ...............................................e......................|.................. + // trn2 v11.2d, v26.2d, v28.2d // ...........................................e..........................|.................. + // trn1 v8.2d, v25.2d, v27.2d // ...................................................e..................|.................. + // trn1 v9.2d, v26.2d, v28.2d // ................................................e.....................|.................. + // ldr q0, [x4], #(6*16) // ...............e......................................................|................e. + // ldr q4, [x4, #(-6*16 + 1*16)] // ..............................e.......................................|.................. + // ldr q1, [x4, #(-6*16 + 2*16)] // ....................................................e.................|.................. + // ldr q5, [x4, #(-6*16 + 3*16)] // ..........................................e...........................|.................. + // ldr q2, [x4, #(-6*16 + 4*16)] // ...........................................................e..........|.................. + // ldr q6, [x4, #(-6*16 + 5*16)] // ..............................................................e.......|.................. + // mul v24.8h, v10.8h, v0.8h // .................................................e....................|.................. + // sqrdmulh v10.8h, v10.8h, v4.8h // ......................................................e...............|.................. + // mls v24.8h, v10.8h, v7.h[0] // ..........................................................e...........|.................. + // sub v10.8h, v8.8h, v24.8h // ....................................................................e.|.................. + // add v8.8h, v8.8h, v24.8h // .............................................................e........|.................. + // mul v24.8h, v11.8h, v0.8h // .............................................e........................|.................. + // sqrdmulh v11.8h, v11.8h, v4.8h // ..............................................e.......................|.................. + // mls v24.8h, v11.8h, v7.h[0] // ..................................................e...................|.................. + // sub v11.8h, v9.8h, v24.8h // .......................................................e..............|.................. + // add v9.8h, v9.8h, v24.8h // .....................................................e................|.................. + // mul v24.8h, v9.8h, v1.8h // ........................................................e.............|.................. + // sqrdmulh v9.8h, v9.8h, v5.8h // .........................................................e............|.................. + // mls v24.8h, v9.8h, v7.h[0] // ............................................................e.........|.................. + // sub v9.8h, v8.8h, v24.8h // ...............................................................e......|.................. + // add v8.8h, v8.8h, v24.8h // ................................................................e.....|.................. + // mul v24.8h, v11.8h, v2.8h // .................................................................e....|.................. + // sqrdmulh v11.8h, v11.8h, v6.8h // ...................................................................e..|.................. + // mls v24.8h, v11.8h, v7.h[0] // ......................................................................|*................. + // sub v11.8h, v10.8h, v24.8h // ..*...................................................................|...*.............. + // add v10.8h, v10.8h, v24.8h // ...*..................................................................|....*............. + // sqdmulh v25.8h, v8.8h, v7.h[1] // .....................................................................e|.................. + // srshr v25.8h, v25.8h, #11 // ....*.................................................................|.....*............ + // mls v8.8h, v25.8h, v7.h[0] // .............*........................................................|..............*... + // sqdmulh v25.8h, v9.8h, v7.h[1] // ..................................................................e...|.................. + // srshr v25.8h, v25.8h, #11 // ......................................................................*.................. + // mls v9.8h, v25.8h, v7.h[0] // .*....................................................................|..*............... + // sqdmulh v25.8h, v10.8h, v7.h[1] // ......*...............................................................|.......*.......... + // srshr v25.8h, v25.8h, #11 // .........*............................................................|..........*....... + // mls v10.8h, v25.8h, v7.h[0] // ............*.........................................................|.............*.... + // sqdmulh v25.8h, v11.8h, v7.h[1] // .....*................................................................|......*........... + // srshr v25.8h, v25.8h, #11 // ........*.............................................................|.........*........ + // mls v11.8h, v25.8h, v7.h[0] // ...........*..........................................................|............*..... + // st4 {v8.4S, v9.4S, v10.4S, v11.4S}, [x1], #64 // ................*.....................................................|.................* + + sub count, count, #1 + cbnz count, layer4567_start + mls v18.8H, v25.8H, v7.H[0] // .*............ + // gap // .............. + srshr v21.8H, v0.8H, #11 // *............. + // gap // .............. + srshr v0.8H, v24.8H, #11 // .....*........ + // gap // .............. + // gap // .............. + // gap // .............. + sub v30.8H, v8.8H, v18.8H // ...*.......... + // gap // .............. + add v29.8H, v8.8H, v18.8H // ....*......... + // gap // .............. + mls v27.8H, v0.8H, v7.H[0] // ............*. + // gap // .............. + sqdmulh v0.8H, v30.8H, v7.H[1] // ......*....... + // gap // .............. + sqdmulh v26.8H, v29.8H, v7.H[1] // .......*...... + // gap // .............. + mls v28.8H, v21.8H, v7.H[0] // ..*........... + // gap // .............. + // gap // .............. + // gap // .............. + srshr v0.8H, v0.8H, #11 // ........*..... + // gap // .............. + srshr v25.8H, v26.8H, #11 // .........*.... + // gap // .............. + // gap // .............. + // gap // .............. + mls v30.8H, v0.8H, v7.H[0] // ..........*... + // gap // .............. + mls v29.8H, v25.8H, v7.H[0] // ...........*.. + // gap // .............. + // gap // .............. + // gap // .............. + // gap // .............. + // gap // .............. + // gap // .............. + // gap // .............. + st4 {v27.4S, v28.4S, v29.4S, v30.4S}, [x1], #64 // .............* + // gap // .............. + + // original source code + // srshr v6.8H, v0.8H, #11 // .*............ + // mls v18.8H, v25.8H, v7.H[0] // *............. + // mls v28.8H, v6.8H, v7.H[0] // ........*..... + // sub v30.8H, v8.8H, v18.8H // ...*.......... + // add v29.8H, v8.8H, v18.8H // ....*......... + // srshr v19.8H, v24.8H, #11 // ..*........... + // sqdmulh v5.8H, v30.8H, v7.H[1] // ......*....... + // sqdmulh v24.8H, v29.8H, v7.H[1] // .......*...... + // srshr v12.8H, v5.8H, #11 // .........*.... + // srshr v11.8H, v24.8H, #11 // ..........*... + // mls v30.8H, v12.8H, v7.H[0] // ...........*.. + // mls v29.8H, v11.8H, v7.H[0] // ............*. + // mls v27.8H, v19.8H, v7.H[0] // .....*........ + // st4 {v27.4S, v28.4S, v29.4S, v30.4S}, [x1], #64 // .............* + + + pop_stack + ret \ No newline at end of file diff --git a/tutorial/tutorial-3a.py b/tutorial_files/tutorial-3a.py similarity index 96% rename from tutorial/tutorial-3a.py rename to tutorial_files/tutorial-3a.py index 70ccf2d1..799f58f0 100644 --- a/tutorial/tutorial-3a.py +++ b/tutorial_files/tutorial-3a.py @@ -1,7 +1,7 @@ import logging import sys -sys.path.append("../") +sys.path.append("/") from slothy import Slothy import slothy.targets.aarch64.aarch64_neon as AArch64_Neon diff --git a/tutorial/tutorial-3b.py b/tutorial_files/tutorial-3b.py similarity index 96% rename from tutorial/tutorial-3b.py rename to tutorial_files/tutorial-3b.py index be04c41f..4c545361 100644 --- a/tutorial/tutorial-3b.py +++ b/tutorial_files/tutorial-3b.py @@ -1,7 +1,7 @@ import logging import sys -sys.path.append("../") +sys.path.append("/") from slothy import Slothy import slothy.targets.aarch64.aarch64_neon as AArch64_Neon diff --git a/tutorial/tutorial-4.py b/tutorial_files/tutorial-4.py similarity index 96% rename from tutorial/tutorial-4.py rename to tutorial_files/tutorial-4.py index cc239560..183d61f7 100644 --- a/tutorial/tutorial-4.py +++ b/tutorial_files/tutorial-4.py @@ -1,7 +1,7 @@ import logging import sys -sys.path.append("../") +sys.path.append("/") from slothy import Slothy import slothy.targets.aarch64.aarch64_neon as AArch64_Neon diff --git a/tutorial/tutorial-5.py b/tutorial_files/tutorial-5.py similarity index 97% rename from tutorial/tutorial-5.py rename to tutorial_files/tutorial-5.py index 1a87d5c7..814677ff 100644 --- a/tutorial/tutorial-5.py +++ b/tutorial_files/tutorial-5.py @@ -1,7 +1,7 @@ import logging import sys -sys.path.append("../") +sys.path.append("/") from slothy import Slothy import slothy.targets.aarch64.aarch64_neon as AArch64_Neon diff --git a/tutorial/tutorial-6.py b/tutorial_files/tutorial-6.py similarity index 97% rename from tutorial/tutorial-6.py rename to tutorial_files/tutorial-6.py index a9c3de6a..f38754c2 100644 --- a/tutorial/tutorial-6.py +++ b/tutorial_files/tutorial-6.py @@ -1,7 +1,7 @@ import logging import sys -sys.path.append("../") +sys.path.append("/") from slothy import Slothy import slothy.targets.aarch64.aarch64_neon as AArch64_Neon diff --git a/tutorial/tutorial-7.py b/tutorial_files/tutorial-7.py similarity index 98% rename from tutorial/tutorial-7.py rename to tutorial_files/tutorial-7.py index edb988a9..703619db 100644 --- a/tutorial/tutorial-7.py +++ b/tutorial_files/tutorial-7.py @@ -1,7 +1,7 @@ import logging import sys -sys.path.append("../") +sys.path.append("/") from slothy import Slothy import slothy.targets.aarch64.aarch64_neon as AArch64_Neon diff --git a/tutorial/tutorial_all.sh b/tutorial_files/tutorial_all.sh similarity index 100% rename from tutorial/tutorial_all.sh rename to tutorial_files/tutorial_all.sh