diff --git a/.CMake/alg_support.cmake b/.CMake/alg_support.cmake
index b313206163..f2d2a85920 100644
--- a/.CMake/alg_support.cmake
+++ b/.CMake/alg_support.cmake
@@ -232,6 +232,32 @@ endif()
endif()
+option(OQS_ENABLE_KEM_ML_KEM "Enable ml_kem algorithm family" ON)
+cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512_ipd "" ON "OQS_ENABLE_KEM_ML_KEM" OFF)
+cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512 "" ON "OQS_ENABLE_KEM_ML_KEM" OFF)
+if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
+if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
+ cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_512_ipd" OFF)
+endif()
+endif()
+
+cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_ipd "" ON "OQS_ENABLE_KEM_ML_KEM" OFF)
+cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768 "" ON "OQS_ENABLE_KEM_ML_KEM" OFF)
+if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
+if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
+ cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_768_ipd" OFF)
+endif()
+endif()
+
+cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_ipd "" ON "OQS_ENABLE_KEM_ML_KEM" OFF)
+cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024 "" ON "OQS_ENABLE_KEM_ML_KEM" OFF)
+if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
+if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
+ cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_1024_ipd" OFF)
+endif()
+endif()
+
+
option(OQS_ENABLE_SIG_DILITHIUM "Enable dilithium algorithm family" ON)
cmake_dependent_option(OQS_ENABLE_SIG_dilithium_2 "" ON "OQS_ENABLE_SIG_DILITHIUM" OFF)
if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux")
@@ -273,6 +299,32 @@ endif()
endif()
+option(OQS_ENABLE_SIG_ML_DSA "Enable ml_dsa algorithm family" ON)
+cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_44_ipd "" ON "OQS_ENABLE_SIG_ML_DSA" OFF)
+cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_44 "" ON "OQS_ENABLE_SIG_ML_DSA" OFF)
+if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux")
+if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
+ cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_44_ipd_avx2 "" ON "OQS_ENABLE_SIG_ml_dsa_44_ipd" OFF)
+endif()
+endif()
+
+cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_65_ipd "" ON "OQS_ENABLE_SIG_ML_DSA" OFF)
+cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_65 "" ON "OQS_ENABLE_SIG_ML_DSA" OFF)
+if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux")
+if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
+ cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_65_ipd_avx2 "" ON "OQS_ENABLE_SIG_ml_dsa_65_ipd" OFF)
+endif()
+endif()
+
+cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_87_ipd "" ON "OQS_ENABLE_SIG_ML_DSA" OFF)
+cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_87 "" ON "OQS_ENABLE_SIG_ML_DSA" OFF)
+if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux")
+if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
+ cmake_dependent_option(OQS_ENABLE_SIG_ml_dsa_87_ipd_avx2 "" ON "OQS_ENABLE_SIG_ml_dsa_87_ipd" OFF)
+endif()
+endif()
+
+
option(OQS_ENABLE_SIG_FALCON "Enable falcon algorithm family" ON)
cmake_dependent_option(OQS_ENABLE_SIG_falcon_512 "" ON "OQS_ENABLE_SIG_FALCON" OFF)
if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS))
@@ -396,7 +448,7 @@ if(NOT ((OQS_MINIMAL_BUILD STREQUAL "") OR (OQS_MINIMAL_BUILD STREQUAL "OFF")))
filter_algs("${OQS_MINIMAL_BUILD}")
elseif (${OQS_ALGS_ENABLED} STREQUAL "STD")
##### OQS_COPY_FROM_UPSTREAM_FRAGMENT_LIST_STANDARDIZED_ALGS_START
- filter_algs("KEM_kyber_512;KEM_kyber_768;KEM_kyber_1024;SIG_dilithium_2;SIG_dilithium_3;SIG_dilithium_5;SIG_falcon_512;SIG_falcon_1024;SIG_sphincs_sha2_128f_simple;SIG_sphincs_sha2_128s_simple;SIG_sphincs_sha2_192f_simple;SIG_sphincs_sha2_192s_simple;SIG_sphincs_sha2_256f_simple;SIG_sphincs_sha2_256s_simple;SIG_sphincs_shake_128f_simple;SIG_sphincs_shake_128s_simple;SIG_sphincs_shake_192f_simple;SIG_sphincs_shake_192s_simple;SIG_sphincs_shake_256f_simple;SIG_sphincs_shake_256s_simple")
+ filter_algs("KEM_ml_kem_512_ipd;KEM_ml_kem_768_ipd;KEM_ml_kem_1024_ipd;SIG_dilithium_;SIG_dilithium_;SIG_dilithium_;SIG_ml_dsa_44_ipd;SIG_ml_dsa_65_ipd;SIG_ml_dsa_87_ipd;SIG_falcon_512;SIG_falcon_1024;SIG_sphincs_sha2_128f_simple;SIG_sphincs_sha2_128s_simple;SIG_sphincs_sha2_192f_simple;SIG_sphincs_sha2_192s_simple;SIG_sphincs_sha2_256f_simple;SIG_sphincs_sha2_256s_simple;SIG_sphincs_shake_128f_simple;SIG_sphincs_shake_128s_simple;SIG_sphincs_shake_192f_simple;SIG_sphincs_shake_192s_simple;SIG_sphincs_shake_256f_simple;SIG_sphincs_shake_256s_simple")
##### OQS_COPY_FROM_UPSTREAM_FRAGMENT_LIST_STANDARDIZED_ALGS_END
elseif(${OQS_ALGS_ENABLED} STREQUAL "NIST_R4")
filter_algs("KEM_classic_mceliece_348864;KEM_classic_mceliece_348864f;KEM_classic_mceliece_460896;KEM_classic_mceliece_460896f;KEM_classic_mceliece_6688128;KEM_classic_mceliece_6688128f;KEM_classic_mceliece_6960119;KEM_classic_mceliece_6960119f;KEM_classic_mceliece_8192128;KEM_classic_mceliece_8192128f;KEM_hqc_128;KEM_hqc_192;KEM_hqc_256;KEM_bike_l1;KEM_bike_l3")
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e1f070b3a7..06e5bd193c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -167,9 +167,15 @@ endif()
if(OQS_ENABLE_KEM_KYBER)
set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/kem/kyber/kem_kyber.h)
endif()
+if(OQS_ENABLE_KEM_ML_KEM)
+ set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/kem/ml_kem/kem_ml_kem.h)
+endif()
if(OQS_ENABLE_SIG_DILITHIUM)
set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/sig/dilithium/sig_dilithium.h)
endif()
+if(OQS_ENABLE_SIG_ML_DSA)
+ set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/sig/ml_dsa/sig_ml_dsa.h)
+endif()
if(OQS_ENABLE_SIG_FALCON)
set(PUBLIC_HEADERS ${PUBLIC_HEADERS} ${PROJECT_SOURCE_DIR}/src/sig/falcon/sig_falcon.h)
endif()
diff --git a/README.md b/README.md
index 69edc15e68..aebb3dbae0 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ The list below indicates all algorithms supported by liboqs, but not all those a
- **FrodoKEM**: FrodoKEM-640-AES, FrodoKEM-640-SHAKE, FrodoKEM-976-AES, FrodoKEM-976-SHAKE, FrodoKEM-1344-AES, FrodoKEM-1344-SHAKE
- **HQC**: HQC-128, HQC-192, HQC-256
- **Kyber**: Kyber512, Kyber768, Kyber1024
+- **ML-KEM**: ML-KEM-512-ipd (alias: ML-KEM-512), ML-KEM-768-ipd (alias: ML-KEM-768), ML-KEM-1024-ipd (alias: ML-KEM-1024)
- **NTRU-Prime**: sntrup761
@@ -54,6 +55,7 @@ The list below indicates all algorithms supported by liboqs, but not all those a
- **CRYSTALS-Dilithium**: Dilithium2, Dilithium3, Dilithium5
- **Falcon**: Falcon-512, Falcon-1024
+- **ML-DSA**: ML-DSA-44-ipd (alias: ML-DSA-44), ML-DSA-65-ipd (alias: ML-DSA-65), ML-DSA-87-ipd (alias: ML-DSA-87)
- **SPHINCS+-SHA2**: SPHINCS+-SHA2-128f-simple, SPHINCS+-SHA2-128s-simple, SPHINCS+-SHA2-192f-simple, SPHINCS+-SHA2-192s-simple, SPHINCS+-SHA2-256f-simple, SPHINCS+-SHA2-256s-simple
- **SPHINCS+-SHAKE**: SPHINCS+-SHAKE-128f-simple, SPHINCS+-SHAKE-128s-simple, SPHINCS+-SHAKE-192f-simple, SPHINCS+-SHAKE-192s-simple, SPHINCS+-SHAKE-256f-simple, SPHINCS+-SHAKE-256s-simple
@@ -176,8 +178,10 @@ liboqs includes some third party libraries or modules that are licensed differen
- `src/kem/classic_mceliece/pqclean_*`: public domain
- `src/kem/kyber/pqcrystals-*`: public domain (CC0) or Apache License v2.0
- `src/kem/kyber/pqclean_*`: public domain (CC0), and public domain (CC0) or Apache License v2.0, and public domain (CC0) or MIT, and MIT
+- `src/kem/ml_kem/pqcrystals-*`: public domain (CC0) or Apache License v2.0
- `src/sig/dilithium/pqcrystals-*`: public domain (CC0) or Apache License v2.0
- `src/sig/dilithium/pqclean_*`: public domain (CC0), and public domain (CC0) or Apache License v2.0, and public domain (CC0) or MIT, and MIT
+- `src/sig/ml_dsa/pqcrystals-*`: public domain (CC0) or Apache License v2.0
- `src/sig/sphincs/pqclean_*`: CC0 (public domain)
## Acknowledgements
diff --git a/docs/algorithms/kem/bike.md b/docs/algorithms/kem/bike.md
index 10741ad398..841993739c 100644
--- a/docs/algorithms/kem/bike.md
+++ b/docs/algorithms/kem/bike.md
@@ -13,11 +13,11 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
-|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
-| BIKE-L1 | IND-CPA | 1 | 1541 | 5223 | 1573 | 32 |
-| BIKE-L3 | IND-CPA | 3 | 3083 | 10105 | 3115 | 32 |
-| BIKE-L5 | IND-CPA | 5 | 5122 | 16494 | 5154 | 32 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
+|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
+| BIKE-L1 | NA | IND-CPA | 1 | 1541 | 5223 | 1573 | 32 |
+| BIKE-L3 | NA | IND-CPA | 3 | 3083 | 10105 | 3115 | 32 |
+| BIKE-L5 | NA | IND-CPA | 5 | 5122 | 16494 | 5154 | 32 |
## BIKE-L1 implementation characteristics
diff --git a/docs/algorithms/kem/classic_mceliece.md b/docs/algorithms/kem/classic_mceliece.md
index 29ba093075..68840c4b00 100644
--- a/docs/algorithms/kem/classic_mceliece.md
+++ b/docs/algorithms/kem/classic_mceliece.md
@@ -18,18 +18,18 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
-|:-------------------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
-| Classic-McEliece-348864 | IND-CCA2 | 1 | 261120 | 6492 | 96 | 32 |
-| Classic-McEliece-348864f | IND-CCA2 | 1 | 261120 | 6492 | 96 | 32 |
-| Classic-McEliece-460896 | IND-CCA2 | 3 | 524160 | 13608 | 156 | 32 |
-| Classic-McEliece-460896f | IND-CCA2 | 3 | 524160 | 13608 | 156 | 32 |
-| Classic-McEliece-6688128 | IND-CCA2 | 5 | 1044992 | 13932 | 208 | 32 |
-| Classic-McEliece-6688128f | IND-CCA2 | 5 | 1044992 | 13932 | 208 | 32 |
-| Classic-McEliece-6960119 | IND-CCA2 | 5 | 1047319 | 13948 | 194 | 32 |
-| Classic-McEliece-6960119f | IND-CCA2 | 5 | 1047319 | 13948 | 194 | 32 |
-| Classic-McEliece-8192128 | IND-CCA2 | 5 | 1357824 | 14120 | 208 | 32 |
-| Classic-McEliece-8192128f | IND-CCA2 | 5 | 1357824 | 14120 | 208 | 32 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
+|:-------------------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
+| Classic-McEliece-348864 | NA | IND-CCA2 | 1 | 261120 | 6492 | 96 | 32 |
+| Classic-McEliece-348864f | NA | IND-CCA2 | 1 | 261120 | 6492 | 96 | 32 |
+| Classic-McEliece-460896 | NA | IND-CCA2 | 3 | 524160 | 13608 | 156 | 32 |
+| Classic-McEliece-460896f | NA | IND-CCA2 | 3 | 524160 | 13608 | 156 | 32 |
+| Classic-McEliece-6688128 | NA | IND-CCA2 | 5 | 1044992 | 13932 | 208 | 32 |
+| Classic-McEliece-6688128f | NA | IND-CCA2 | 5 | 1044992 | 13932 | 208 | 32 |
+| Classic-McEliece-6960119 | NA | IND-CCA2 | 5 | 1047319 | 13948 | 194 | 32 |
+| Classic-McEliece-6960119f | NA | IND-CCA2 | 5 | 1047319 | 13948 | 194 | 32 |
+| Classic-McEliece-8192128 | NA | IND-CCA2 | 5 | 1357824 | 14120 | 208 | 32 |
+| Classic-McEliece-8192128f | NA | IND-CCA2 | 5 | 1357824 | 14120 | 208 | 32 |
## Classic-McEliece-348864 implementation characteristics
diff --git a/docs/algorithms/kem/frodokem.md b/docs/algorithms/kem/frodokem.md
index fbf5366b04..07f216a5a5 100644
--- a/docs/algorithms/kem/frodokem.md
+++ b/docs/algorithms/kem/frodokem.md
@@ -12,14 +12,14 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
-|:-------------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
-| FrodoKEM-640-AES | IND-CCA2 | 1 | 9616 | 19888 | 9720 | 16 |
-| FrodoKEM-640-SHAKE | IND-CCA2 | 1 | 9616 | 19888 | 9720 | 16 |
-| FrodoKEM-976-AES | IND-CCA2 | 3 | 15632 | 31296 | 15744 | 24 |
-| FrodoKEM-976-SHAKE | IND-CCA2 | 3 | 15632 | 31296 | 15744 | 24 |
-| FrodoKEM-1344-AES | IND-CCA2 | 5 | 21520 | 43088 | 21632 | 32 |
-| FrodoKEM-1344-SHAKE | IND-CCA2 | 5 | 21520 | 43088 | 21632 | 32 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
+|:-------------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
+| FrodoKEM-640-AES | NA | IND-CCA2 | 1 | 9616 | 19888 | 9720 | 16 |
+| FrodoKEM-640-SHAKE | NA | IND-CCA2 | 1 | 9616 | 19888 | 9720 | 16 |
+| FrodoKEM-976-AES | NA | IND-CCA2 | 3 | 15632 | 31296 | 15744 | 24 |
+| FrodoKEM-976-SHAKE | NA | IND-CCA2 | 3 | 15632 | 31296 | 15744 | 24 |
+| FrodoKEM-1344-AES | NA | IND-CCA2 | 5 | 21520 | 43088 | 21632 | 32 |
+| FrodoKEM-1344-SHAKE | NA | IND-CCA2 | 5 | 21520 | 43088 | 21632 | 32 |
## FrodoKEM-640-AES implementation characteristics
diff --git a/docs/algorithms/kem/hqc.md b/docs/algorithms/kem/hqc.md
index 84dab7f6c5..58d0834815 100644
--- a/docs/algorithms/kem/hqc.md
+++ b/docs/algorithms/kem/hqc.md
@@ -14,11 +14,11 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
-|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
-| HQC-128 | IND-CCA2 | 1 | 2249 | 2305 | 4433 | 64 |
-| HQC-192 | IND-CCA2 | 3 | 4522 | 4586 | 8978 | 64 |
-| HQC-256 | IND-CCA2 | 5 | 7245 | 7317 | 14421 | 64 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
+|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
+| HQC-128 | NA | IND-CCA2 | 1 | 2249 | 2305 | 4433 | 64 |
+| HQC-192 | NA | IND-CCA2 | 3 | 4522 | 4586 | 8978 | 64 |
+| HQC-256 | NA | IND-CCA2 | 5 | 7245 | 7317 | 14421 | 64 |
## HQC-128 implementation characteristics
diff --git a/docs/algorithms/kem/kyber.md b/docs/algorithms/kem/kyber.md
index 9f2ad1f85d..a75c144a2d 100644
--- a/docs/algorithms/kem/kyber.md
+++ b/docs/algorithms/kem/kyber.md
@@ -17,11 +17,11 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
-|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
-| Kyber512 | IND-CCA2 | 1 | 800 | 1632 | 768 | 32 |
-| Kyber768 | IND-CCA2 | 3 | 1184 | 2400 | 1088 | 32 |
-| Kyber1024 | IND-CCA2 | 5 | 1568 | 3168 | 1568 | 32 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
+|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
+| Kyber512 | NA | IND-CCA2 | 1 | 800 | 1632 | 768 | 32 |
+| Kyber768 | NA | IND-CCA2 | 3 | 1184 | 2400 | 1088 | 32 |
+| Kyber1024 | NA | IND-CCA2 | 5 | 1568 | 3168 | 1568 | 32 |
## Kyber512 implementation characteristics
diff --git a/docs/algorithms/kem/ml_kem.md b/docs/algorithms/kem/ml_kem.md
new file mode 100644
index 0000000000..92d1a5b4bd
--- /dev/null
+++ b/docs/algorithms/kem/ml_kem.md
@@ -0,0 +1,53 @@
+# ML-KEM
+
+- **Algorithm type**: Key encapsulation mechanism.
+- **Main cryptographic assumption**: Module LWE+R with base ring Z[x]/(3329, x^256+1).
+- **Principal submitters**: Peter Schwabe.
+- **Auxiliary submitters**: Roberto Avanzi, Joppe Bos, Léo Ducas, Eike Kiltz, Tancrède Lepoint, Vadim Lyubashevsky, John M. Schanck, Gregor Seiler, Damien Stehlé.
+- **Authors' website**: https://pq-crystals.org/kyber/ and https://csrc.nist.gov/pubs/fips/203/ipd
+- **Specification version**: ML-KEM-ipd.
+- **Primary Source**:
+ - **Source**: https://github.com/pq-crystals/kyber/commit/11d00ff1f20cfca1f72d819e5a45165c1e0a2816 with copy_from_upstream patches
+ - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
+
+
+## Parameter set summary
+
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
+|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
+| ML-KEM-512-ipd | ML-KEM-512 | IND-CCA2 | 1 | 800 | 1632 | 768 | 32 |
+| ML-KEM-768-ipd | ML-KEM-768 | IND-CCA2 | 3 | 1184 | 2400 | 1088 | 32 |
+| ML-KEM-1024-ipd | ML-KEM-1024 | IND-CCA2 | 5 | 1568 | 3168 | 1568 | 32 |
+
+## ML-KEM-512-ipd implementation characteristics
+
+| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ |
+|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------|
+| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+
+Are implementations chosen based on runtime CPU feature detection? **Yes**.
+
+ ‡For an explanation of what this denotes, consult the [Explanation of Terms](#explanation-of-terms) section at the end of this file.
+
+## ML-KEM-768-ipd implementation characteristics
+
+| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
+|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
+| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+
+Are implementations chosen based on runtime CPU feature detection? **Yes**.
+
+## ML-KEM-1024-ipd implementation characteristics
+
+| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
+|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
+| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
+
+Are implementations chosen based on runtime CPU feature detection? **Yes**.
+
+## Explanation of Terms
+
+- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments.
\ No newline at end of file
diff --git a/docs/algorithms/kem/ml_kem.yml b/docs/algorithms/kem/ml_kem.yml
new file mode 100644
index 0000000000..38b0a3ef24
--- /dev/null
+++ b/docs/algorithms/kem/ml_kem.yml
@@ -0,0 +1,125 @@
+name: ML-KEM
+type: kem
+principal-submitters:
+- Peter Schwabe
+auxiliary-submitters:
+- Roberto Avanzi
+- Joppe Bos
+- Léo Ducas
+- Eike Kiltz
+- Tancrède Lepoint
+- Vadim Lyubashevsky
+- John M. Schanck
+- Gregor Seiler
+- Damien Stehlé
+crypto-assumption: Module LWE+R with base ring Z[x]/(3329, x^256+1)
+website: https://pq-crystals.org/kyber/ and https://csrc.nist.gov/pubs/fips/203/ipd
+nist-round: ipd
+spec-version: ML-KEM-ipd
+primary-upstream:
+ source: https://github.com/pq-crystals/kyber/commit/11d00ff1f20cfca1f72d819e5a45165c1e0a2816
+ with copy_from_upstream patches
+ spdx-license-identifier: CC0-1.0 or Apache-2.0
+parameter-sets:
+- name: ML-KEM-512-ipd
+ alias: ML-KEM-512
+ claimed-nist-level: 1
+ claimed-security: IND-CCA2
+ length-public-key: 800
+ length-ciphertext: 768
+ length-secret-key: 1632
+ length-shared-secret: 32
+ implementations-switch-on-runtime-cpu-features: true
+ implementations:
+ - upstream: primary-upstream
+ upstream-id: ref
+ supported-platforms: all
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+ - upstream: primary-upstream
+ upstream-id: avx2
+ supported-platforms:
+ - architecture: x86_64
+ operating_systems:
+ - Linux
+ - Darwin
+ required_flags:
+ - avx2
+ - bmi2
+ - popcnt
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+- name: ML-KEM-768-ipd
+ alias: ML-KEM-768
+ claimed-nist-level: 3
+ claimed-security: IND-CCA2
+ length-public-key: 1184
+ length-ciphertext: 1088
+ length-secret-key: 2400
+ length-shared-secret: 32
+ implementations-switch-on-runtime-cpu-features: true
+ implementations:
+ - upstream: primary-upstream
+ upstream-id: ref
+ supported-platforms: all
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+ - upstream: primary-upstream
+ upstream-id: avx2
+ supported-platforms:
+ - architecture: x86_64
+ operating_systems:
+ - Linux
+ - Darwin
+ required_flags:
+ - avx2
+ - bmi2
+ - popcnt
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+- name: ML-KEM-1024-ipd
+ alias: ML-KEM-1024
+ claimed-nist-level: 5
+ claimed-security: IND-CCA2
+ length-public-key: 1568
+ length-ciphertext: 1568
+ length-secret-key: 3168
+ length-shared-secret: 32
+ implementations-switch-on-runtime-cpu-features: true
+ implementations:
+ - upstream: primary-upstream
+ upstream-id: ref
+ supported-platforms: all
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+ - upstream: primary-upstream
+ upstream-id: avx2
+ supported-platforms:
+ - architecture: x86_64
+ operating_systems:
+ - Linux
+ - Darwin
+ required_flags:
+ - avx2
+ - bmi2
+ - popcnt
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
diff --git a/docs/algorithms/kem/ntruprime.md b/docs/algorithms/kem/ntruprime.md
index 07a7ca899d..5ff56716ff 100644
--- a/docs/algorithms/kem/ntruprime.md
+++ b/docs/algorithms/kem/ntruprime.md
@@ -14,9 +14,9 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
-|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
-| sntrup761 | IND-CCA2 | 2 | 1158 | 1763 | 1039 | 32 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Ciphertext size (bytes) | Shared secret size (bytes) |
+|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|--------------------------:|-----------------------------:|
+| sntrup761 | NA | IND-CCA2 | 2 | 1158 | 1763 | 1039 | 32 |
## sntrup761 implementation characteristics
diff --git a/docs/algorithms/sig/dilithium.md b/docs/algorithms/sig/dilithium.md
index cd4ecb7336..d26daa2854 100644
--- a/docs/algorithms/sig/dilithium.md
+++ b/docs/algorithms/sig/dilithium.md
@@ -17,11 +17,11 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) |
-|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:|
-| Dilithium2 | EUF-CMA | 2 | 1312 | 2528 | 2420 |
-| Dilithium3 | EUF-CMA | 3 | 1952 | 4000 | 3293 |
-| Dilithium5 | EUF-CMA | 5 | 2592 | 4864 | 4595 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) |
+|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:|
+| Dilithium2 | NA | EUF-CMA | 2 | 1312 | 2528 | 2420 |
+| Dilithium3 | NA | EUF-CMA | 3 | 1952 | 4000 | 3293 |
+| Dilithium5 | NA | EUF-CMA | 5 | 2592 | 4864 | 4595 |
## Dilithium2 implementation characteristics
diff --git a/docs/algorithms/sig/falcon.md b/docs/algorithms/sig/falcon.md
index 1221110624..df05809687 100644
--- a/docs/algorithms/sig/falcon.md
+++ b/docs/algorithms/sig/falcon.md
@@ -13,10 +13,10 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) |
-|:---------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:|
-| Falcon-512 | EUF-CMA | 1 | 897 | 1281 | 666 |
-| Falcon-1024 | EUF-CMA | 5 | 1793 | 2305 | 1280 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) |
+|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:|
+| Falcon-512 | NA | EUF-CMA | 1 | 897 | 1281 | 666 |
+| Falcon-1024 | NA | EUF-CMA | 5 | 1793 | 2305 | 1280 |
## Falcon-512 implementation characteristics
diff --git a/docs/algorithms/sig/ml_dsa.md b/docs/algorithms/sig/ml_dsa.md
new file mode 100644
index 0000000000..ab2b43488e
--- /dev/null
+++ b/docs/algorithms/sig/ml_dsa.md
@@ -0,0 +1,53 @@
+# ML-DSA
+
+- **Algorithm type**: Digital signature scheme.
+- **Main cryptographic assumption**: hardness of lattice problems over module lattices.
+- **Principal submitters**: Vadim Lyubashevsky.
+- **Auxiliary submitters**: Shi Bai, Léo Ducas, Eike Kiltz, Tancrède Lepoint, Peter Schwabe, Gregor Seiler, Damien Stehlé.
+- **Authors' website**: https://pq-crystals.org/dilithium/ and https://csrc.nist.gov/pubs/fips/204/ipd
+- **Specification version**: ML-DSA-ipd.
+- **Primary Source**:
+ - **Source**: https://github.com/pq-crystals/dilithium/commit/e7bed6258b9a3703ce78d4ec38021c86382ce31c with copy_from_upstream patches
+ - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
+
+
+## Parameter set summary
+
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) |
+|:---------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:|
+| ML-DSA-44-ipd | ML-DSA-44 | EUF-CMA | 2 | 1312 | 2560 | 2420 |
+| ML-DSA-65-ipd | ML-DSA-65 | EUF-CMA | 3 | 1952 | 4032 | 3309 |
+| ML-DSA-87-ipd | ML-DSA-87 | EUF-CMA | 5 | 2592 | 4896 | 4627 |
+
+## ML-DSA-44-ipd implementation characteristics
+
+| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ |
+|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------|
+| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Darwin,Linux | AVX2,POPCNT | True | True | False |
+
+Are implementations chosen based on runtime CPU feature detection? **Yes**.
+
+ ‡For an explanation of what this denotes, consult the [Explanation of Terms](#explanation-of-terms) section at the end of this file.
+
+## ML-DSA-65-ipd implementation characteristics
+
+| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
+|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
+| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Darwin,Linux | AVX2,POPCNT | True | True | False |
+
+Are implementations chosen based on runtime CPU feature detection? **Yes**.
+
+## ML-DSA-87-ipd implementation characteristics
+
+| Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? |
+|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
+| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
+| [Primary Source](#primary-source) | avx2 | x86\_64 | Darwin,Linux | AVX2,POPCNT | True | True | False |
+
+Are implementations chosen based on runtime CPU feature detection? **Yes**.
+
+## Explanation of Terms
+
+- **Large Stack Usage**: Implementations identified as having such may cause failures when running in threads or in constrained environments.
\ No newline at end of file
diff --git a/docs/algorithms/sig/ml_dsa.yml b/docs/algorithms/sig/ml_dsa.yml
new file mode 100644
index 0000000000..c936883588
--- /dev/null
+++ b/docs/algorithms/sig/ml_dsa.yml
@@ -0,0 +1,117 @@
+name: ML-DSA
+type: signature
+principal-submitters:
+- Vadim Lyubashevsky
+auxiliary-submitters:
+- Shi Bai
+- Léo Ducas
+- Eike Kiltz
+- Tancrède Lepoint
+- Peter Schwabe
+- Gregor Seiler
+- Damien Stehlé
+crypto-assumption: hardness of lattice problems over module lattices
+website: https://pq-crystals.org/dilithium/ and https://csrc.nist.gov/pubs/fips/204/ipd
+nist-round: ipd
+spec-version: ML-DSA-ipd
+primary-upstream:
+ source: https://github.com/pq-crystals/dilithium/commit/e7bed6258b9a3703ce78d4ec38021c86382ce31c
+ with copy_from_upstream patches
+ spdx-license-identifier: CC0-1.0 or Apache-2.0
+parameter-sets:
+- name: ML-DSA-44-ipd
+ alias: ML-DSA-44
+ claimed-nist-level: 2
+ claimed-security: EUF-CMA
+ length-public-key: 1312
+ length-secret-key: 2560
+ length-signature: 2420
+ implementations-switch-on-runtime-cpu-features: true
+ implementations:
+ - upstream: primary-upstream
+ upstream-id: ref
+ supported-platforms: all
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+ - upstream: primary-upstream
+ upstream-id: avx2
+ supported-platforms:
+ - architecture: x86_64
+ operating_systems:
+ - Darwin
+ - Linux
+ required_flags:
+ - avx2
+ - popcnt
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+- name: ML-DSA-65-ipd
+ alias: ML-DSA-65
+ claimed-nist-level: 3
+ claimed-security: EUF-CMA
+ length-public-key: 1952
+ length-secret-key: 4032
+ length-signature: 3309
+ implementations-switch-on-runtime-cpu-features: true
+ implementations:
+ - upstream: primary-upstream
+ upstream-id: ref
+ supported-platforms: all
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+ - upstream: primary-upstream
+ upstream-id: avx2
+ supported-platforms:
+ - architecture: x86_64
+ operating_systems:
+ - Darwin
+ - Linux
+ required_flags:
+ - avx2
+ - popcnt
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+- name: ML-DSA-87-ipd
+ alias: ML-DSA-87
+ claimed-nist-level: 5
+ claimed-security: EUF-CMA
+ length-public-key: 2592
+ length-secret-key: 4896
+ length-signature: 4627
+ implementations-switch-on-runtime-cpu-features: true
+ implementations:
+ - upstream: primary-upstream
+ upstream-id: ref
+ supported-platforms: all
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
+ - upstream: primary-upstream
+ upstream-id: avx2
+ supported-platforms:
+ - architecture: x86_64
+ operating_systems:
+ - Darwin
+ - Linux
+ required_flags:
+ - avx2
+ - popcnt
+ common-crypto:
+ - SHA3: liboqs
+ no-secret-dependent-branching-claimed: true
+ no-secret-dependent-branching-checked-by-valgrind: true
+ large-stack-usage: false
diff --git a/docs/algorithms/sig/sphincs.md b/docs/algorithms/sig/sphincs.md
index 3ce2b0ae96..a1660e483d 100644
--- a/docs/algorithms/sig/sphincs.md
+++ b/docs/algorithms/sig/sphincs.md
@@ -17,20 +17,20 @@
## Parameter set summary
-| Parameter set | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) |
-|:--------------------------:|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:|
-| SPHINCS+-SHA2-128f-simple | EUF-CMA | 1 | 32 | 64 | 17088 |
-| SPHINCS+-SHA2-128s-simple | EUF-CMA | 1 | 32 | 64 | 7856 |
-| SPHINCS+-SHA2-192f-simple | EUF-CMA | 3 | 48 | 96 | 35664 |
-| SPHINCS+-SHA2-192s-simple | EUF-CMA | 3 | 48 | 96 | 16224 |
-| SPHINCS+-SHA2-256f-simple | EUF-CMA | 5 | 64 | 128 | 49856 |
-| SPHINCS+-SHA2-256s-simple | EUF-CMA | 5 | 64 | 128 | 29792 |
-| SPHINCS+-SHAKE-128f-simple | EUF-CMA | 1 | 32 | 64 | 17088 |
-| SPHINCS+-SHAKE-128s-simple | EUF-CMA | 1 | 32 | 64 | 7856 |
-| SPHINCS+-SHAKE-192f-simple | EUF-CMA | 3 | 48 | 96 | 35664 |
-| SPHINCS+-SHAKE-192s-simple | EUF-CMA | 3 | 48 | 96 | 16224 |
-| SPHINCS+-SHAKE-256f-simple | EUF-CMA | 5 | 64 | 128 | 49856 |
-| SPHINCS+-SHAKE-256s-simple | EUF-CMA | 5 | 64 | 128 | 29792 |
+| Parameter set | Parameter set alias | Security model | Claimed NIST Level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) |
+|:--------------------------:|:----------------------|:-----------------|---------------------:|--------------------------:|--------------------------:|-------------------------:|
+| SPHINCS+-SHA2-128f-simple | NA | EUF-CMA | 1 | 32 | 64 | 17088 |
+| SPHINCS+-SHA2-128s-simple | NA | EUF-CMA | 1 | 32 | 64 | 7856 |
+| SPHINCS+-SHA2-192f-simple | NA | EUF-CMA | 3 | 48 | 96 | 35664 |
+| SPHINCS+-SHA2-192s-simple | NA | EUF-CMA | 3 | 48 | 96 | 16224 |
+| SPHINCS+-SHA2-256f-simple | NA | EUF-CMA | 5 | 64 | 128 | 49856 |
+| SPHINCS+-SHA2-256s-simple | NA | EUF-CMA | 5 | 64 | 128 | 29792 |
+| SPHINCS+-SHAKE-128f-simple | NA | EUF-CMA | 1 | 32 | 64 | 17088 |
+| SPHINCS+-SHAKE-128s-simple | NA | EUF-CMA | 1 | 32 | 64 | 7856 |
+| SPHINCS+-SHAKE-192f-simple | NA | EUF-CMA | 3 | 48 | 96 | 35664 |
+| SPHINCS+-SHAKE-192s-simple | NA | EUF-CMA | 3 | 48 | 96 | 16224 |
+| SPHINCS+-SHAKE-256f-simple | NA | EUF-CMA | 5 | 64 | 128 | 49856 |
+| SPHINCS+-SHAKE-256s-simple | NA | EUF-CMA | 5 | 64 | 128 | 29792 |
## SPHINCS+-SHA2-128f-simple implementation characteristics
diff --git a/docs/cbom.json b/docs/cbom.json
index 87a7be3227..02d2d59ca4 100644
--- a/docs/cbom.json
+++ b/docs/cbom.json
@@ -1,23 +1,23 @@
{
"bomFormat": "CBOM",
"specVersion": "1.4-cbom-1.0",
- "serialNumber": "urn:uuid:043e6cd4-f2a6-4828-ae97-7cbdbd372414",
+ "serialNumber": "urn:uuid:c25dad99-ad00-48b6-aa9e-25d4f7c3c8c5",
"version": 1,
"metadata": {
- "timestamp": "2023-10-19T08:58:49.361520",
+ "timestamp": "2023-12-13T17:05:36.137517",
"component": {
"type": "library",
- "bom-ref": "pkg:github/open-quantum-safe/liboqs@4846f81a98232e6c90f08578e8f122146550be8d",
+ "bom-ref": "pkg:github/open-quantum-safe/liboqs@5f83324a6c464448b70b1e57b3cd161b6832e0e0",
"name": "liboqs",
- "version": "4846f81a98232e6c90f08578e8f122146550be8d"
+ "version": "5f83324a6c464448b70b1e57b3cd161b6832e0e0"
}
},
"components": [
{
"type": "library",
- "bom-ref": "pkg:github/open-quantum-safe/liboqs@4846f81a98232e6c90f08578e8f122146550be8d",
+ "bom-ref": "pkg:github/open-quantum-safe/liboqs@5f83324a6c464448b70b1e57b3cd161b6832e0e0",
"name": "liboqs",
- "version": "4846f81a98232e6c90f08578e8f122146550be8d"
+ "version": "5f83324a6c464448b70b1e57b3cd161b6832e0e0"
},
{
"type": "crypto-asset",
@@ -959,6 +959,126 @@
"nistQuantumSecurityLevel": 5
}
},
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-KEM-512-ipd:generic",
+ "name": "ML-KEM",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-KEM-512-ipd",
+ "primitive": "kem",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "encapsulate",
+ "decapsulate"
+ ],
+ "implementationPlatform": "generic"
+ },
+ "nistQuantumSecurityLevel": 1
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-KEM-512-ipd:x86_64",
+ "name": "ML-KEM",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-KEM-512-ipd",
+ "primitive": "kem",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "encapsulate",
+ "decapsulate"
+ ],
+ "implementationPlatform": "x86_64"
+ },
+ "nistQuantumSecurityLevel": 1
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-KEM-768-ipd:generic",
+ "name": "ML-KEM",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-KEM-768-ipd",
+ "primitive": "kem",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "encapsulate",
+ "decapsulate"
+ ],
+ "implementationPlatform": "generic"
+ },
+ "nistQuantumSecurityLevel": 3
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-KEM-768-ipd:x86_64",
+ "name": "ML-KEM",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-KEM-768-ipd",
+ "primitive": "kem",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "encapsulate",
+ "decapsulate"
+ ],
+ "implementationPlatform": "x86_64"
+ },
+ "nistQuantumSecurityLevel": 3
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-KEM-1024-ipd:generic",
+ "name": "ML-KEM",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-KEM-1024-ipd",
+ "primitive": "kem",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "encapsulate",
+ "decapsulate"
+ ],
+ "implementationPlatform": "generic"
+ },
+ "nistQuantumSecurityLevel": 5
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-KEM-1024-ipd:x86_64",
+ "name": "ML-KEM",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-KEM-1024-ipd",
+ "primitive": "kem",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "encapsulate",
+ "decapsulate"
+ ],
+ "implementationPlatform": "x86_64"
+ },
+ "nistQuantumSecurityLevel": 5
+ }
+ },
{
"type": "crypto-asset",
"bom-ref": "alg:sntrup761:generic",
@@ -1299,6 +1419,126 @@
"nistQuantumSecurityLevel": 5
}
},
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-DSA-44-ipd:generic",
+ "name": "ML-DSA",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-DSA-44-ipd",
+ "primitive": "signature",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "sign",
+ "verify"
+ ],
+ "implementationPlatform": "generic"
+ },
+ "nistQuantumSecurityLevel": 2
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-DSA-44-ipd:x86_64",
+ "name": "ML-DSA",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-DSA-44-ipd",
+ "primitive": "signature",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "sign",
+ "verify"
+ ],
+ "implementationPlatform": "x86_64"
+ },
+ "nistQuantumSecurityLevel": 2
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-DSA-65-ipd:generic",
+ "name": "ML-DSA",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-DSA-65-ipd",
+ "primitive": "signature",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "sign",
+ "verify"
+ ],
+ "implementationPlatform": "generic"
+ },
+ "nistQuantumSecurityLevel": 3
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-DSA-65-ipd:x86_64",
+ "name": "ML-DSA",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-DSA-65-ipd",
+ "primitive": "signature",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "sign",
+ "verify"
+ ],
+ "implementationPlatform": "x86_64"
+ },
+ "nistQuantumSecurityLevel": 3
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-DSA-87-ipd:generic",
+ "name": "ML-DSA",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-DSA-87-ipd",
+ "primitive": "signature",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "sign",
+ "verify"
+ ],
+ "implementationPlatform": "generic"
+ },
+ "nistQuantumSecurityLevel": 5
+ }
+ },
+ {
+ "type": "crypto-asset",
+ "bom-ref": "alg:ML-DSA-87-ipd:x86_64",
+ "name": "ML-DSA",
+ "cryptoProperties": {
+ "assetType": "algorithm",
+ "algorithmProperties": {
+ "variant": "ML-DSA-87-ipd",
+ "primitive": "signature",
+ "implementationLevel": "softwarePlainRam",
+ "cryptoFunctions": [
+ "keygen",
+ "sign",
+ "verify"
+ ],
+ "implementationPlatform": "x86_64"
+ },
+ "nistQuantumSecurityLevel": 5
+ }
+ },
{
"type": "crypto-asset",
"bom-ref": "alg:SPHINCS+-SHA2-128f-simple:generic",
@@ -1808,7 +2048,7 @@
],
"dependencies": [
{
- "ref": "pkg:github/open-quantum-safe/liboqs@4846f81a98232e6c90f08578e8f122146550be8d",
+ "ref": "pkg:github/open-quantum-safe/liboqs@5f83324a6c464448b70b1e57b3cd161b6832e0e0",
"dependsOn": [
"alg:BIKE-L1:x86_64",
"alg:BIKE-L3:x86_64",
@@ -1857,6 +2097,12 @@
"alg:Kyber1024:generic",
"alg:Kyber1024:x86_64",
"alg:Kyber1024:armv8-a",
+ "alg:ML-KEM-512-ipd:generic",
+ "alg:ML-KEM-512-ipd:x86_64",
+ "alg:ML-KEM-768-ipd:generic",
+ "alg:ML-KEM-768-ipd:x86_64",
+ "alg:ML-KEM-1024-ipd:generic",
+ "alg:ML-KEM-1024-ipd:x86_64",
"alg:sntrup761:generic",
"alg:sntrup761:x86_64",
"alg:Dilithium2:generic",
@@ -1874,6 +2120,12 @@
"alg:Falcon-1024:generic",
"alg:Falcon-1024:x86_64",
"alg:Falcon-1024:armv8-a",
+ "alg:ML-DSA-44-ipd:generic",
+ "alg:ML-DSA-44-ipd:x86_64",
+ "alg:ML-DSA-65-ipd:generic",
+ "alg:ML-DSA-65-ipd:x86_64",
+ "alg:ML-DSA-87-ipd:generic",
+ "alg:ML-DSA-87-ipd:x86_64",
"alg:SPHINCS+-SHA2-128f-simple:generic",
"alg:SPHINCS+-SHA2-128f-simple:x86_64",
"alg:SPHINCS+-SHA2-128s-simple:generic",
@@ -2262,6 +2514,48 @@
],
"dependencyType": "uses"
},
+ {
+ "ref": "alg:ML-KEM-512-ipd:generic",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-KEM-512-ipd:x86_64",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-KEM-768-ipd:generic",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-KEM-768-ipd:x86_64",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-KEM-1024-ipd:generic",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-KEM-1024-ipd:x86_64",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
{
"ref": "alg:sntrup761:generic",
"dependsOn": [
@@ -2381,6 +2675,48 @@
],
"dependencyType": "uses"
},
+ {
+ "ref": "alg:ML-DSA-44-ipd:generic",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-DSA-44-ipd:x86_64",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-DSA-65-ipd:generic",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-DSA-65-ipd:x86_64",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-DSA-87-ipd:generic",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
+ {
+ "ref": "alg:ML-DSA-87-ipd:x86_64",
+ "dependsOn": [
+ "alg:sha3"
+ ],
+ "dependencyType": "uses"
+ },
{
"ref": "alg:SPHINCS+-SHAKE-128f-simple:generic",
"dependsOn": [
diff --git a/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg.fragment b/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg.fragment
index b600b1e51f..cb3b7d15dd 100644
--- a/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg.fragment
+++ b/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg.fragment
@@ -2,6 +2,9 @@
option(OQS_ENABLE_KEM_{{ family['name']|upper }} "Enable {{ family['name'] }} algorithm family" ON)
{%- for scheme in family['schemes'] %}
cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }} "" ON "OQS_ENABLE_KEM_{{ family['name']|upper }}" OFF)
+{%- if 'alias_scheme' in scheme %}
+cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }} "" ON "OQS_ENABLE_KEM_{{ family['name']|upper }}" OFF)
+{%- endif -%}
{%- for impl in scheme['metadata']['implementations'] if impl['name'] != family['default_implementation'] and impl['supported_platforms'] -%}
{%- for platform in impl['supported_platforms'] if platform['architecture'] == 'x86_64' %}
{% if platform['operating_systems'] %}if(CMAKE_SYSTEM_NAME MATCHES "{{ platform['operating_systems']|join('|') }}")
@@ -37,6 +40,9 @@ endif()
option(OQS_ENABLE_SIG_{{ family['name']|upper }} "Enable {{ family['name'] }} algorithm family" ON)
{%- for scheme in family['schemes'] %}
cmake_dependent_option(OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['scheme'] }} "" ON "OQS_ENABLE_SIG_{{ family['name']|upper }}" OFF)
+{%- if 'alias_scheme' in scheme %}
+cmake_dependent_option(OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }} "" ON "OQS_ENABLE_SIG_{{ family['name']|upper }}" OFF)
+{%- endif -%}
{%- for impl in scheme['metadata']['implementations'] if impl['name'] != family['default_implementation'] and impl['supported_platforms'] -%}
{%- for platform in impl['supported_platforms'] if platform['architecture'] == 'x86_64' %}
{% if platform['operating_systems'] %}if(CMAKE_SYSTEM_NAME MATCHES "{{ platform['operating_systems']|join('|') }}")
diff --git a/scripts/copy_from_upstream/.CMake/alg_support.cmake/list_standardized_algs.fragment b/scripts/copy_from_upstream/.CMake/alg_support.cmake/list_standardized_algs.fragment
index fc5a4fecbc..d0bf2a61b2 100644
--- a/scripts/copy_from_upstream/.CMake/alg_support.cmake/list_standardized_algs.fragment
+++ b/scripts/copy_from_upstream/.CMake/alg_support.cmake/list_standardized_algs.fragment
@@ -1,6 +1,6 @@
filter_algs("
-{%- for family in instructions['kems'] if family['name'] == 'kyber' -%}
+{%- for family in instructions['kems'] if family['name'] in ['ml_kem'] -%}
{%- for scheme in family['schemes'] -%}
KEM_{{ family['name'] }}_{{ scheme['scheme'] }};
{%- endfor -%}
@@ -8,7 +8,7 @@
{%- for family in instructions['sigs'] -%}
{%- set outer_loop = loop -%}
{%- for scheme in family['schemes'] -%}
- SIG_{{ family['name'] }}_{{ scheme['scheme'] }}{%- if not (outer_loop.last and loop.last) -%};{%- endif -%}
+ SIG_{{ family['name'] }}_{{ scheme['scheme'] if family['name'] in ['ml_dsa', 'falcon', 'sphincs'] }}{%- if not (outer_loop.last and loop.last) -%};{%- endif -%}
{%- endfor -%}
{%- endfor -%}
")
diff --git a/scripts/copy_from_upstream/copy_from_upstream.py b/scripts/copy_from_upstream/copy_from_upstream.py
index 1f3cdc4bed..32d897cdf8 100755
--- a/scripts/copy_from_upstream/copy_from_upstream.py
+++ b/scripts/copy_from_upstream/copy_from_upstream.py
@@ -550,6 +550,8 @@ def process_families(instructions, basedir, with_kat, with_generator):
print("Adding new KAT for %s" % (scheme['pretty_name_full']))
pass
kats['kem'][scheme['pretty_name_full']]['single'] = scheme['metadata']['nistkat-sha256']
+ if 'alias_pretty_name_full' in scheme:
+ kats['kem'][scheme['alias_pretty_name_full']]['single'] = scheme['metadata']['nistkat-sha256']
else:
try:
if kats['sig'][scheme['pretty_name_full']]['single'] != scheme['metadata']['nistkat-sha256']:
@@ -558,6 +560,8 @@ def process_families(instructions, basedir, with_kat, with_generator):
print("Adding new KAT for %s" % (scheme['pretty_name_full']))
pass
kats['sig'][scheme['pretty_name_full']]['single'] = scheme['metadata']['nistkat-sha256']
+ if 'alias_pretty_name_full' in scheme:
+ kats['sig'][scheme['alias_pretty_name_full']]['single'] = scheme['metadata']['nistkat-sha256']
if with_generator:
generator(
diff --git a/scripts/copy_from_upstream/copy_from_upstream.yml b/scripts/copy_from_upstream/copy_from_upstream.yml
index 8c1313ac1e..f55b8798ba 100644
--- a/scripts/copy_from_upstream/copy_from_upstream.yml
+++ b/scripts/copy_from_upstream/copy_from_upstream.yml
@@ -29,6 +29,14 @@ upstreams:
kem_meta_path: '{pretty_name_full}_META.yml'
kem_scheme_path: '.'
patches: [pqcrystals-kyber-yml.patch, pqcrystals-kyber-ref-shake-aes.patch, pqcrystals-kyber-avx2-shake-aes.patch]
+ -
+ name: pqcrystals-kyber-standard
+ git_url: https://github.com/pq-crystals/kyber.git
+ git_branch: standard
+ git_commit: 11d00ff1f20cfca1f72d819e5a45165c1e0a2816
+ kem_meta_path: '{pretty_name_full}_META.yml'
+ kem_scheme_path: '.'
+ patches: [pqcrystals-ml_kem_ipd.patch]
-
name: pqcrystals-dilithium
git_url: https://github.com/pq-crystals/dilithium.git
@@ -37,6 +45,14 @@ upstreams:
sig_meta_path: '{pretty_name_full}_META.yml'
sig_scheme_path: '.'
patches: [pqcrystals-dilithium-yml.patch, pqcrystals-dilithium-ref-shake-aes.patch, pqcrystals-dilithium-avx2-shake-aes.patch]
+ -
+ name: pqcrystals-dilithium-standard
+ git_url: https://github.com/pq-crystals/dilithium.git
+ git_branch: standard
+ git_commit: e7bed6258b9a3703ce78d4ec38021c86382ce31c
+ sig_meta_path: '{pretty_name_full}_META.yml'
+ sig_scheme_path: '.'
+ patches: [pqcrystals-ml_dsa_ipd.patch]
kems:
-
name: classic_mceliece
@@ -121,6 +137,29 @@ kems:
scheme: "1024"
pqclean_scheme: kyber1024
pretty_name_full: Kyber1024
+ -
+ name: ml_kem
+ default_implementation: ref
+ upstream_location: pqcrystals-kyber-standard
+ schemes:
+ -
+ scheme: "512_ipd"
+ pqclean_scheme: ml-kem-512-ipd
+ pretty_name_full: ML-KEM-512-ipd
+ alias_scheme: "512"
+ alias_pretty_name_full: ML-KEM-512
+ -
+ scheme: "768_ipd"
+ pqclean_scheme: ml-kem-768-ipd
+ pretty_name_full: ML-KEM-768-ipd
+ alias_scheme: "768"
+ alias_pretty_name_full: ML-KEM-768
+ -
+ scheme: "1024_ipd"
+ pqclean_scheme: ml-kem-1024-ipd
+ pretty_name_full: ML-KEM-1024-ipd
+ alias_scheme: "1024"
+ alias_pretty_name_full: ML-KEM-1024
sigs:
-
name: dilithium
@@ -146,6 +185,32 @@ sigs:
pqclean_scheme: dilithium5
pretty_name_full: Dilithium5
signed_msg_order: sig_then_msg
+ -
+ name: ml_dsa
+ default_implementation: ref
+ upstream_location: pqcrystals-dilithium-standard
+ schemes:
+ -
+ scheme: "44_ipd"
+ pqclean_scheme: ml-dsa-44-ipd
+ pretty_name_full: ML-DSA-44-ipd
+ signed_msg_order: sig_then_msg
+ alias_scheme: "44"
+ alias_pretty_name_full: ML-DSA-44
+ -
+ scheme: "65_ipd"
+ pqclean_scheme: ml-dsa-65-ipd
+ pretty_name_full: ML-DSA-65-ipd
+ signed_msg_order: sig_then_msg
+ alias_scheme: "65"
+ alias_pretty_name_full: ML-DSA-65
+ -
+ scheme: "87_ipd"
+ pqclean_scheme: ml-dsa-87-ipd
+ pretty_name_full: ML-DSA-87-ipd
+ signed_msg_order: sig_then_msg
+ alias_scheme: "87"
+ alias_pretty_name_full: ML-DSA-87
-
name: falcon
default_implementation: clean
diff --git a/scripts/copy_from_upstream/patches/pqcrystals-ml_dsa_ipd.patch b/scripts/copy_from_upstream/patches/pqcrystals-ml_dsa_ipd.patch
new file mode 100644
index 0000000000..58e1cf34de
--- /dev/null
+++ b/scripts/copy_from_upstream/patches/pqcrystals-ml_dsa_ipd.patch
@@ -0,0 +1,842 @@
+diff --git a/Dilithium2_META.yml b/ML-DSA-44-ipd_META.yml
+index 0e2e6fc..d99edb5 100644
+--- a/Dilithium2_META.yml
++++ b/ML-DSA-44-ipd_META.yml
+@@ -1,11 +1,11 @@
+-name: Dilithium2
++name: ML-DSA-44-ipd
+ type: signature
+ claimed-nist-level: 2
+ length-public-key: 1312
+-length-secret-key: 2528
++length-secret-key: 2560
+ length-signature: 2420
+-nistkat-sha256: 26ae9c1224171e957dbe38672942d31edb7dffbe700825e0cb52128cdb45280a
+-testvectors-sha256: b56155479f5643a3cb3d73260ba2b1fd7e772a49b6f4cebcf742cd860fbf6879
++nistkat-sha256: e6f3ec4dc0b02dd3bcbbc6b105190e1890ca0bb3f802e2b571f0d70f3993a2e1
++testvectors-sha256: aff4dbcb0c5ad52c840036907661efd2cafd6c1cba95ed052184f45adf30f365
+ principal-submitters:
+ - Vadim Lyubashevsky
+ auxiliary-submitters:
+@@ -18,22 +18,20 @@ auxiliary-submitters:
+ - Damien Stehlé
+ implementations:
+ - name: ref
+- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409
++ version: https://github.com/pq-crystals/dilithium/tree/standard
+ folder_name: ref
+- compile_opts: -DDILITHIUM_MODE=2 -DDILITHIUM_RANDOMIZED_SIGNING
+- signature_keypair: pqcrystals_dilithium2_ref_keypair
+- signature_signature: pqcrystals_dilithium2_ref_signature
+- signature_verify: pqcrystals_dilithium2_ref_verify
+- sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h fips202.h symmetric-shake.c
+- common_dep: common_ref
++ compile_opts: -DDILITHIUM_MODE=2
++ signature_keypair: pqcrystals_ml_dsa_44_ipd_ref_keypair
++ signature_signature: pqcrystals_ml_dsa_44_ipd_ref_signature
++ signature_verify: pqcrystals_ml_dsa_44_ipd_ref_verify
++ sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h symmetric-shake.c
+ - name: avx2
+- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409
+- compile_opts: -DDILITHIUM_MODE=2 -DDILITHIUM_RANDOMIZED_SIGNING
+- signature_keypair: pqcrystals_dilithium2_avx2_keypair
+- signature_signature: pqcrystals_dilithium2_avx2_signature
+- signature_verify: pqcrystals_dilithium2_avx2_verify
+- sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h fips202.h fips202x4.h symmetric-shake.c
+- common_dep: common_avx2
++ version: https://github.com/pq-crystals/dilithium/tree/standard
++ compile_opts: -DDILITHIUM_MODE=2
++ signature_keypair: pqcrystals_ml_dsa_44_ipd_avx2_keypair
++ signature_signature: pqcrystals_ml_dsa_44_ipd_avx2_signature
++ signature_verify: pqcrystals_ml_dsa_44_ipd_avx2_verify
++ sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h symmetric-shake.c
+ supported_platforms:
+ - architecture: x86_64
+ operating_systems:
+diff --git a/Dilithium3_META.yml b/ML-DSA-65-ipd_META.yml
+index d1bca64..72a43e7 100644
+--- a/Dilithium3_META.yml
++++ b/ML-DSA-65-ipd_META.yml
+@@ -1,11 +1,11 @@
+-name: Dilithium3
++name: ML-DSA-65-ipd
+ type: signature
+ claimed-nist-level: 3
+ length-public-key: 1952
+-length-secret-key: 4000
+-length-signature: 3293
+-nistkat-sha256: eea584803c3d6991a4acbf9f117147bbdd246faf822cfb1a17effe20b2052ba9
+-testvectors-sha256: a237032c7840a0d2f922951f806c2199f8f86b8a8947f6f6f1b856c925222958
++length-secret-key: 4032
++length-signature: 3309
++nistkat-sha256: 7225c4531086d88c9b7fa18101b0f78dda2d38df88812c65ddc1ae94fe3c01a7
++testvectors-sha256: e0a98c0a29137dcbeb12104ccaa6a0555a9bdb4dcfbc2b0fc9a959dd8b6c8699
+ principal-submitters:
+ - Vadim Lyubashevsky
+ auxiliary-submitters:
+@@ -18,22 +18,20 @@ auxiliary-submitters:
+ - Damien Stehlé
+ implementations:
+ - name: ref
+- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409
++ version: https://github.com/pq-crystals/dilithium/tree/standard
+ folder_name: ref
+- compile_opts: -DDILITHIUM_MODE=3 -DDILITHIUM_RANDOMIZED_SIGNING
+- signature_keypair: pqcrystals_dilithium3_ref_keypair
+- signature_signature: pqcrystals_dilithium3_ref_signature
+- signature_verify: pqcrystals_dilithium3_ref_verify
+- sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h fips202.h symmetric-shake.c
+- common_dep: common_ref
++ compile_opts: -DDILITHIUM_MODE=3
++ signature_keypair: pqcrystals_ml_dsa_65_ipd_ref_keypair
++ signature_signature: pqcrystals_ml_dsa_65_ipd_ref_signature
++ signature_verify: pqcrystals_ml_dsa_65_ipd_ref_verify
++ sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h symmetric-shake.c
+ - name: avx2
+- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409
+- compile_opts: -DDILITHIUM_MODE=3 -DDILITHIUM_RANDOMIZED_SIGNING
+- signature_keypair: pqcrystals_dilithium3_avx2_keypair
+- signature_signature: pqcrystals_dilithium3_avx2_signature
+- signature_verify: pqcrystals_dilithium3_avx2_verify
+- sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h fips202.h fips202x4.h symmetric-shake.c
+- common_dep: common_avx2
++ version: https://github.com/pq-crystals/dilithium/tree/standard
++ compile_opts: -DDILITHIUM_MODE=3
++ signature_keypair: pqcrystals_ml_dsa_65_ipd_avx2_keypair
++ signature_signature: pqcrystals_ml_dsa_65_ipd_avx2_signature
++ signature_verify: pqcrystals_ml_dsa_65_ipd_avx2_verify
++ sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h symmetric-shake.c
+ supported_platforms:
+ - architecture: x86_64
+ operating_systems:
+diff --git a/Dilithium5_META.yml b/ML-DSA-87-ipd_META.yml
+index a4dbdbf..bf68590 100644
+--- a/Dilithium5_META.yml
++++ b/ML-DSA-87-ipd_META.yml
+@@ -1,11 +1,11 @@
+-name: Dilithium5
++name: ML-DSA-87-ipd
+ type: signature
+ claimed-nist-level: 5
+ length-public-key: 2592
+-length-secret-key: 4864
+-length-signature: 4595
+-nistkat-sha256: 3f6e58603a38be57cf08d79b01fcfd0ccc1129a09e14a6122c6fe22c906ddc3b
+-testvectors-sha256: ddeb95f4a743562010bce527ea7c99fed4ce1234bafd5ed6f44eea0f065ba49c
++length-secret-key: 4896
++length-signature: 4627
++nistkat-sha256: f5cb5ed44a261a4118f9cfd5d55b4210939cb5b8531968a10c37060551a8927f
++testvectors-sha256: 9a1985c10b13efefee50067edf3432ed8ab48a62965743feb45a317485980883
+ principal-submitters:
+ - Vadim Lyubashevsky
+ auxiliary-submitters:
+@@ -18,22 +18,20 @@ auxiliary-submitters:
+ - Damien Stehlé
+ implementations:
+ - name: ref
+- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409
++ version: https://github.com/pq-crystals/dilithium/tree/standard
+ folder_name: ref
+- compile_opts: -DDILITHIUM_MODE=5 -DDILITHIUM_RANDOMIZED_SIGNING
+- signature_keypair: pqcrystals_dilithium5_ref_keypair
+- signature_signature: pqcrystals_dilithium5_ref_signature
+- signature_verify: pqcrystals_dilithium5_ref_verify
+- sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h fips202.h symmetric-shake.c
+- common_dep: common_ref
++ compile_opts: -DDILITHIUM_MODE=5
++ signature_keypair: pqcrystals_ml_dsa_87_ipd_ref_keypair
++ signature_signature: pqcrystals_ml_dsa_87_ipd_ref_signature
++ signature_verify: pqcrystals_ml_dsa_87_ipd_ref_verify
++ sources: ../LICENSE api.h config.h params.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.c ntt.h reduce.c reduce.h rounding.c rounding.h symmetric.h symmetric-shake.c
+ - name: avx2
+- version: https://github.com/pq-crystals/dilithium/commit/d9c885d3f2e11c05529eeeb7d70d808c972b8409
+- compile_opts: -DDILITHIUM_MODE=5 -DDILITHIUM_RANDOMIZED_SIGNING
+- signature_keypair: pqcrystals_dilithium5_avx2_keypair
+- signature_signature: pqcrystals_dilithium5_avx2_signature
+- signature_verify: pqcrystals_dilithium5_avx2_verify
+- sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h fips202.h fips202x4.h symmetric-shake.c
+- common_dep: common_avx2
++ version: https://github.com/pq-crystals/dilithium/tree/standard
++ compile_opts: -DDILITHIUM_MODE=5
++ signature_keypair: pqcrystals_ml_dsa_87_ipd_avx2_keypair
++ signature_signature: pqcrystals_ml_dsa_87_ipd_avx2_signature
++ signature_verify: pqcrystals_ml_dsa_87_ipd_avx2_verify
++ sources: ../LICENSE api.h config.h params.h align.h sign.c sign.h packing.c packing.h polyvec.c polyvec.h poly.c poly.h ntt.S invntt.S pointwise.S ntt.h shuffle.S shuffle.inc consts.c consts.h rejsample.c rejsample.h rounding.c rounding.h symmetric.h symmetric-shake.c
+ supported_platforms:
+ - architecture: x86_64
+ operating_systems:
+diff --git a/README.md b/README.md
+index 5a5d48d..d6b337a 100644
+--- a/README.md
++++ b/README.md
+@@ -18,9 +18,9 @@ brew install openssl
+ ```
+ Then, run
+ ```sh
+-export CFLAGS="-I/usr/local/opt/openssl@1.1/include"
+-export NISTFLAGS="-I/usr/local/opt/openssl@1.1/include"
+-export LDFLAGS="-L/usr/local/opt/openssl@1.1/lib"
++export CFLAGS="-I/opt/homebrew/opt/openssl@1.1/include"
++export NISTFLAGS="-I/opt/homebrew/opt/openssl@1.1/include"
++export LDFLAGS="-L/opt/homebrew/opt/openssl@1.1/lib"
+ ```
+ before compilation to add the OpenSSL header and library locations to the respective search paths.
+
+@@ -60,11 +60,11 @@ Our Dilithium implementations are contained in the [SUPERCOP](https://bench.cr.y
+
+ ## Randomized signing
+
+-By default our code implements Dilithium's deterministic signing mode. To change this to the randomized signing mode, define the `DILITHIUM_RANDOMIZED_SIGNING` preprocessor macro at compilation by either uncommenting the line
++By default our code implements Dilithium's randomized signing mode. To change this to the deterministic signing mode, undefine the `DILITHIUM_RANDOMIZED_SIGNING` preprocessor macro at compilation by commenting the line
+ ```sh
+-//#define DILITHIUM_RANDOMIZED_SIGNING
++#define DILITHIUM_RANDOMIZED_SIGNING
+ ```
+-in config.h, or adding `-DDILITHIUM_RANDOMIZED_SIGNING` to the compiler flags in the environment variable `CFLAGS`.
++in config.h.
+
+ ## Shared libraries
+
+diff --git a/avx2/api.h b/avx2/api.h
+index 1948a96..55b6376 100644
+--- a/avx2/api.h
++++ b/avx2/api.h
+@@ -5,7 +5,7 @@
+ #include
+
+ #define pqcrystals_dilithium2_PUBLICKEYBYTES 1312
+-#define pqcrystals_dilithium2_SECRETKEYBYTES 2528
++#define pqcrystals_dilithium2_SECRETKEYBYTES 2560
+ #define pqcrystals_dilithium2_BYTES 2420
+
+ #define pqcrystals_dilithium2_avx2_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES
+@@ -32,8 +32,8 @@ int pqcrystals_dilithium2_avx2_open(uint8_t *m, size_t *mlen,
+
+
+ #define pqcrystals_dilithium3_PUBLICKEYBYTES 1952
+-#define pqcrystals_dilithium3_SECRETKEYBYTES 4000
+-#define pqcrystals_dilithium3_BYTES 3293
++#define pqcrystals_dilithium3_SECRETKEYBYTES 4032
++#define pqcrystals_dilithium3_BYTES 3309
+
+ #define pqcrystals_dilithium3_avx2_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES
+ #define pqcrystals_dilithium3_avx2_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES
+@@ -59,8 +59,8 @@ int pqcrystals_dilithium3_avx2_open(uint8_t *m, size_t *mlen,
+
+
+ #define pqcrystals_dilithium5_PUBLICKEYBYTES 2592
+-#define pqcrystals_dilithium5_SECRETKEYBYTES 4864
+-#define pqcrystals_dilithium5_BYTES 4595
++#define pqcrystals_dilithium5_SECRETKEYBYTES 4896
++#define pqcrystals_dilithium5_BYTES 4627
+
+ #define pqcrystals_dilithium5_avx2_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES
+ #define pqcrystals_dilithium5_avx2_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES
+diff --git a/avx2/config.h b/avx2/config.h
+index ba5caa8..e59f81a 100644
+--- a/avx2/config.h
++++ b/avx2/config.h
+@@ -2,7 +2,7 @@
+ #define CONFIG_H
+
+ //#define DILITHIUM_MODE 2
+-//#define DILITHIUM_RANDOMIZED_SIGNING
++#define DILITHIUM_RANDOMIZED_SIGNING
+ //#define USE_RDPMC
+ //#define DBENCH
+
+@@ -11,17 +11,17 @@
+ #endif
+
+ #if DILITHIUM_MODE == 2
+-#define CRYPTO_ALGNAME "Dilithium2"
+-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium2_avx2
+-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium2_avx2_##s
++#define CRYPTO_ALGNAME "ML-DSA-44-ipd"
++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_avx2
++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_avx2_##s
+ #elif DILITHIUM_MODE == 3
+-#define CRYPTO_ALGNAME "Dilithium3"
+-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium3_avx2
+-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium3_avx2_##s
++#define CRYPTO_ALGNAME "ML-DSA-65-ipd"
++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_avx2
++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_avx2_##s
+ #elif DILITHIUM_MODE == 5
+-#define CRYPTO_ALGNAME "Dilithium5"
+-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium5_avx2
+-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium5_avx2_##s
++#define CRYPTO_ALGNAME "ML-DSA-87-ipd"
++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_avx2
++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_avx2_##s
+ #endif
+
+ #endif
+diff --git a/avx2/poly.c b/avx2/poly.c
+index c1b21c1..25d3682 100644
+--- a/avx2/poly.c
++++ b/avx2/poly.c
+@@ -401,6 +401,7 @@ void poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce)
+ stream128_state state;
+ stream128_init(&state, seed, nonce);
+ poly_uniform_preinit(a, &state);
++ stream128_release(&state);
+ }
+
+ void poly_uniform_4x(poly *a0,
+@@ -415,7 +416,7 @@ void poly_uniform_4x(poly *a0,
+ {
+ unsigned int ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_BUFLEN+8) buf[4];
+- keccakx4_state state;
++ shake128x4incctx state;
+ __m256i f;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+@@ -433,6 +434,7 @@ void poly_uniform_4x(poly *a0,
+ buf[3].coeffs[SEEDBYTES+0] = nonce3;
+ buf[3].coeffs[SEEDBYTES+1] = nonce3 >> 8;
+
++ shake128x4_inc_init(&state);
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_NBLOCKS, &state);
+
+@@ -449,6 +451,7 @@ void poly_uniform_4x(poly *a0,
+ ctr2 += rej_uniform(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
++ shake128x4_inc_ctx_release(&state);
+ }
+
+ /*************************************************
+@@ -530,6 +533,7 @@ void poly_uniform_eta(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce)
+ stream256_state state;
+ stream256_init(&state, seed, nonce);
+ poly_uniform_eta_preinit(a, &state);
++ stream256_release(&state);
+ }
+
+ void poly_uniform_eta_4x(poly *a0,
+@@ -546,7 +550,7 @@ void poly_uniform_eta_4x(poly *a0,
+ ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf[4];
+
+ __m256i f;
+- keccakx4_state state;
++ shake256x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)&seed[0]);
+ _mm256_store_si256(&buf[0].vec[0],f);
+@@ -568,6 +572,7 @@ void poly_uniform_eta_4x(poly *a0,
+ buf[3].coeffs[64] = nonce3;
+ buf[3].coeffs[65] = nonce3 >> 8;
+
++ shake256x4_inc_init(&state);
+ shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66);
+ shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_ETA_NBLOCKS, &state);
+
+@@ -584,6 +589,7 @@ void poly_uniform_eta_4x(poly *a0,
+ ctr2 += rej_eta(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE256_RATE);
+ ctr3 += rej_eta(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE256_RATE);
+ }
++ shake256x4_inc_ctx_release(&state);
+ }
+
+ /*************************************************
+@@ -611,6 +617,7 @@ void poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce)
+ stream256_state state;
+ stream256_init(&state, seed, nonce);
+ poly_uniform_gamma1_preinit(a, &state);
++ stream256_release(&state);
+ }
+
+ void poly_uniform_gamma1_4x(poly *a0,
+@@ -624,7 +631,7 @@ void poly_uniform_gamma1_4x(poly *a0,
+ uint16_t nonce3)
+ {
+ ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS*STREAM256_BLOCKBYTES+14) buf[4];
+- keccakx4_state state;
++ shake256x4incctx state;
+ __m256i f;
+
+ f = _mm256_loadu_si256((__m256i *)&seed[0]);
+@@ -647,8 +654,10 @@ void poly_uniform_gamma1_4x(poly *a0,
+ buf[3].coeffs[64] = nonce3;
+ buf[3].coeffs[65] = nonce3 >> 8;
+
++ shake256x4_inc_init(&state);
+ shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 66);
+ shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, &state);
++ shake256x4_inc_ctx_release(&state);
+
+ polyz_unpack(a0, buf[0].coeffs);
+ polyz_unpack(a1, buf[1].coeffs);
+@@ -670,12 +679,12 @@ void poly_challenge(poly * restrict c, const uint8_t seed[SEEDBYTES]) {
+ unsigned int i, b, pos;
+ uint64_t signs;
+ ALIGNED_UINT8(SHAKE256_RATE) buf;
+- keccak_state state;
++ shake256incctx state;
+
+- shake256_init(&state);
+- shake256_absorb(&state, seed, SEEDBYTES);
+- shake256_finalize(&state);
+- shake256_squeezeblocks(buf.coeffs, 1, &state);
++ shake256_inc_init(&state);
++ shake256_inc_absorb(&state, seed, SEEDBYTES);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state);
+
+ memcpy(&signs, buf.coeffs, 8);
+ pos = 8;
+@@ -695,6 +704,7 @@ void poly_challenge(poly * restrict c, const uint8_t seed[SEEDBYTES]) {
+ c->coeffs[b] = 1 - 2*(signs & 1);
+ signs >>= 1;
+ }
++ shake256_inc_ctx_release(&state);
+ }
+
+ /*************************************************
+diff --git a/avx2/sign.c b/avx2/sign.c
+index c8f2398..a39f851 100644
+--- a/avx2/sign.c
++++ b/avx2/sign.c
+@@ -161,7 +161,7 @@ int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t
+ polyvecl y;
+ polyveck w0;
+ } tmpv;
+- keccak_state state;
++ shake256incctx state;
+
+ rho = seedbuf;
+ tr = rho + SEEDBYTES;
+@@ -172,11 +172,11 @@ int crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t
+ unpack_sk(rho, tr, key, &t0, &s1, &s2, sk);
+
+ /* Compute CRH(tr, msg) */
+- shake256_init(&state);
+- shake256_absorb(&state, tr, TRBYTES);
+- shake256_absorb(&state, m, mlen);
+- shake256_finalize(&state);
+- shake256_squeeze(mu, CRHBYTES, &state);
++ shake256_inc_init(&state);
++ shake256_inc_absorb(&state, tr, TRBYTES);
++ shake256_inc_absorb(&state, m, mlen);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(mu, CRHBYTES, &state);
+
+ #ifdef DILITHIUM_RANDOMIZED_SIGNING
+ randombytes(rnd, RNDBYTES);
+@@ -223,11 +223,11 @@ rej:
+ polyveck_decompose(&w1, &tmpv.w0, &w1);
+ polyveck_pack_w1(sig, &w1);
+
+- shake256_init(&state);
+- shake256_absorb(&state, mu, CRHBYTES);
+- shake256_absorb(&state, sig, K*POLYW1_PACKEDBYTES);
+- shake256_finalize(&state);
+- shake256_squeeze(sig, CTILDEBYTES, &state);
++ shake256_inc_ctx_reset(&state);
++ shake256_inc_absorb(&state, mu, CRHBYTES);
++ shake256_inc_absorb(&state, sig, K*POLYW1_PACKEDBYTES);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(sig, CTILDEBYTES, &state);
+ poly_challenge(&c, sig);
+ poly_ntt(&c);
+
+@@ -272,6 +272,7 @@ rej:
+ hint[OMEGA + i] = pos = pos + n;
+ }
+
++ shake256_inc_ctx_release(&state);
+ /* Pack z into signature */
+ for(i = 0; i < L; i++)
+ polyz_pack(sig + CTILDEBYTES + i*POLYZ_PACKEDBYTES, &z.vec[i]);
+@@ -329,18 +330,19 @@ int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size
+ polyvecl *row = rowbuf;
+ polyvecl z;
+ poly c, w1, h;
+- keccak_state state;
++ shake256incctx state;
+
+ if(siglen != CRYPTO_BYTES)
+ return -1;
+
+ /* Compute CRH(H(rho, t1), msg) */
+ shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES);
+- shake256_init(&state);
+- shake256_absorb(&state, mu, CRHBYTES);
+- shake256_absorb(&state, m, mlen);
+- shake256_finalize(&state);
+- shake256_squeeze(mu, CRHBYTES, &state);
++ shake256_inc_init(&state);
++ shake256_inc_absorb(&state, mu, CRHBYTES);
++ shake256_inc_absorb(&state, m, mlen);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(mu, CRHBYTES, &state);
++ shake256_inc_ctx_release(&state);
+
+ /* Expand challenge */
+ poly_challenge(&c, sig);
+@@ -390,11 +392,12 @@ int crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size
+ if(hint[j]) return -1;
+
+ /* Call random oracle and verify challenge */
+- shake256_init(&state);
+- shake256_absorb(&state, mu, CRHBYTES);
+- shake256_absorb(&state, buf.coeffs, K*POLYW1_PACKEDBYTES);
+- shake256_finalize(&state);
+- shake256_squeeze(buf.coeffs, CTILDEBYTES, &state);
++ shake256_inc_init(&state);
++ shake256_inc_absorb(&state, mu, CRHBYTES);
++ shake256_inc_absorb(&state, buf.coeffs, K*POLYW1_PACKEDBYTES);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(buf.coeffs, CTILDEBYTES, &state);
++ shake256_inc_ctx_release(&state);
+ for(i = 0; i < CTILDEBYTES; ++i)
+ if(buf.coeffs[i] != sig[i])
+ return -1;
+diff --git a/avx2/symmetric.h b/avx2/symmetric.h
+index 8f3c3c5..fa49963 100644
+--- a/avx2/symmetric.h
++++ b/avx2/symmetric.h
+@@ -6,21 +6,23 @@
+
+ #include "fips202.h"
+
+-typedef keccak_state stream128_state;
+-typedef keccak_state stream256_state;
++typedef shake128incctx stream128_state;
++typedef shake256incctx stream256_state;
+
+ #define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init)
+-void dilithium_shake128_stream_init(keccak_state *state, const uint8_t seed[SEEDBYTES], uint16_t nonce);
++void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce);
+
+ #define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init)
+-void dilithium_shake256_stream_init(keccak_state *state, const uint8_t seed[CRHBYTES], uint16_t nonce);
++void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce);
+
+ #define STREAM128_BLOCKBYTES SHAKE128_RATE
+ #define STREAM256_BLOCKBYTES SHAKE256_RATE
+
+ #define stream128_init(STATE, SEED, NONCE) dilithium_shake128_stream_init(STATE, SEED, NONCE)
+ #define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE)
++#define stream128_release(STATE) shake128_inc_ctx_release(STATE)
+ #define stream256_init(STATE, SEED, NONCE) dilithium_shake256_stream_init(STATE, SEED, NONCE)
+ #define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) shake256_squeezeblocks(OUT, OUTBLOCKS, STATE)
++#define stream256_release(STATE) shake256_inc_ctx_release(STATE)
+
+ #endif
+diff --git a/ref/api.h b/ref/api.h
+index cc5c6fe..78caa5c 100644
+--- a/ref/api.h
++++ b/ref/api.h
+@@ -33,7 +33,7 @@ int pqcrystals_dilithium2_ref_open(uint8_t *m, size_t *mlen,
+
+ #define pqcrystals_dilithium3_PUBLICKEYBYTES 1952
+ #define pqcrystals_dilithium3_SECRETKEYBYTES 4032
+-#define pqcrystals_dilithium3_BYTES 3293
++#define pqcrystals_dilithium3_BYTES 3309
+
+ #define pqcrystals_dilithium3_ref_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES
+ #define pqcrystals_dilithium3_ref_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES
+@@ -60,7 +60,7 @@ int pqcrystals_dilithium3_ref_open(uint8_t *m, size_t *mlen,
+
+ #define pqcrystals_dilithium5_PUBLICKEYBYTES 2592
+ #define pqcrystals_dilithium5_SECRETKEYBYTES 4896
+-#define pqcrystals_dilithium5_BYTES 4595
++#define pqcrystals_dilithium5_BYTES 4627
+
+ #define pqcrystals_dilithium5_ref_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES
+ #define pqcrystals_dilithium5_ref_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES
+diff --git a/ref/config.h b/ref/config.h
+index 5ddcd8c..eddf13f 100644
+--- a/ref/config.h
++++ b/ref/config.h
+@@ -2,7 +2,7 @@
+ #define CONFIG_H
+
+ //#define DILITHIUM_MODE 2
+-//#define DILITHIUM_RANDOMIZED_SIGNING
++#define DILITHIUM_RANDOMIZED_SIGNING
+ //#define USE_RDPMC
+ //#define DBENCH
+
+@@ -11,17 +11,17 @@
+ #endif
+
+ #if DILITHIUM_MODE == 2
+-#define CRYPTO_ALGNAME "Dilithium2"
+-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium2_ref
+-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium2_ref_##s
++#define CRYPTO_ALGNAME "ML-DSA-44-ipd"
++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_44_ipd_ref
++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_44_ipd_ref_##s
+ #elif DILITHIUM_MODE == 3
+-#define CRYPTO_ALGNAME "Dilithium3"
+-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium3_ref
+-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium3_ref_##s
++#define CRYPTO_ALGNAME "ML-DSA-65-ipd"
++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_65_ipd_ref
++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_65_ipd_ref_##s
+ #elif DILITHIUM_MODE == 5
+-#define CRYPTO_ALGNAME "Dilithium5"
+-#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium5_ref
+-#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium5_ref_##s
++#define CRYPTO_ALGNAME "ML-DSA-87-ipd"
++#define DILITHIUM_NAMESPACETOP pqcrystals_ml_dsa_87_ipd_ref
++#define DILITHIUM_NAMESPACE(s) pqcrystals_ml_dsa_87_ipd_ref_##s
+ #endif
+
+ #endif
+diff --git a/ref/packing.h b/ref/packing.h
+index 1e8e9e7..8e47728 100644
+--- a/ref/packing.h
++++ b/ref/packing.h
+@@ -18,7 +18,7 @@ void pack_sk(uint8_t sk[CRYPTO_SECRETKEYBYTES],
+ const polyveck *s2);
+
+ #define pack_sig DILITHIUM_NAMESPACE(pack_sig)
+-void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[SEEDBYTES], const polyvecl *z, const polyveck *h);
++void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const polyvecl *z, const polyveck *h);
+
+ #define unpack_pk DILITHIUM_NAMESPACE(unpack_pk)
+ void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]);
+@@ -33,6 +33,6 @@ void unpack_sk(uint8_t rho[SEEDBYTES],
+ const uint8_t sk[CRYPTO_SECRETKEYBYTES]);
+
+ #define unpack_sig DILITHIUM_NAMESPACE(unpack_sig)
+-int unpack_sig(uint8_t c[SEEDBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]);
++int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]);
+
+ #endif
+diff --git a/ref/poly.c b/ref/poly.c
+index fe3b787..7983aac 100644
+--- a/ref/poly.c
++++ b/ref/poly.c
+@@ -365,6 +365,7 @@ void poly_uniform(poly *a,
+ buflen = STREAM128_BLOCKBYTES + off;
+ ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf, buflen);
+ }
++ stream128_release(&state);
+ }
+
+ /*************************************************
+@@ -450,6 +451,7 @@ void poly_uniform_eta(poly *a,
+ stream256_squeezeblocks(buf, 1, &state);
+ ctr += rej_eta(a->coeffs + ctr, N - ctr, buf, STREAM256_BLOCKBYTES);
+ }
++ stream256_release(&state);
+ }
+
+ /*************************************************
+@@ -473,6 +475,7 @@ void poly_uniform_gamma1(poly *a,
+
+ stream256_init(&state, seed, nonce);
+ stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1_NBLOCKS, &state);
++ stream256_release(&state);
+ polyz_unpack(a, buf);
+ }
+
+@@ -490,11 +493,11 @@ void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) {
+ unsigned int i, b, pos;
+ uint64_t signs;
+ uint8_t buf[SHAKE256_RATE];
+- keccak_state state;
++ shake256incctx state;
+
+- shake256_init(&state);
+- shake256_absorb(&state, seed, SEEDBYTES);
+- shake256_finalize(&state);
++ shake256_inc_init(&state);
++ shake256_inc_absorb(&state, seed, SEEDBYTES);
++ shake256_inc_finalize(&state);
+ shake256_squeezeblocks(buf, 1, &state);
+
+ signs = 0;
+@@ -518,6 +521,7 @@ void poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) {
+ c->coeffs[b] = 1 - 2*(signs & 1);
+ signs >>= 1;
+ }
++ shake256_inc_ctx_release(&state);
+ }
+
+ /*************************************************
+diff --git a/ref/sign.c b/ref/sign.c
+index d25a399..9298ad2 100644
+--- a/ref/sign.c
++++ b/ref/sign.c
+@@ -90,7 +90,7 @@ int crypto_sign_signature(uint8_t *sig,
+ polyvecl mat[K], s1, y, z;
+ polyveck t0, s2, w1, w0, h;
+ poly cp;
+- keccak_state state;
++ shake256incctx state;
+
+ rho = seedbuf;
+ tr = rho + SEEDBYTES;
+@@ -102,11 +102,11 @@ int crypto_sign_signature(uint8_t *sig,
+
+
+ /* Compute mu = CRH(tr, msg) */
+- shake256_init(&state);
+- shake256_absorb(&state, tr, TRBYTES);
+- shake256_absorb(&state, m, mlen);
+- shake256_finalize(&state);
+- shake256_squeeze(mu, CRHBYTES, &state);
++ shake256_inc_init(&state);
++ shake256_inc_absorb(&state, tr, TRBYTES);
++ shake256_inc_absorb(&state, m, mlen);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(mu, CRHBYTES, &state);
+
+ #ifdef DILITHIUM_RANDOMIZED_SIGNING
+ randombytes(rnd, RNDBYTES);
+@@ -138,11 +138,11 @@ rej:
+ polyveck_decompose(&w1, &w0, &w1);
+ polyveck_pack_w1(sig, &w1);
+
+- shake256_init(&state);
+- shake256_absorb(&state, mu, CRHBYTES);
+- shake256_absorb(&state, sig, K*POLYW1_PACKEDBYTES);
+- shake256_finalize(&state);
+- shake256_squeeze(sig, CTILDEBYTES, &state);
++ shake256_inc_ctx_reset(&state);
++ shake256_inc_absorb(&state, mu, CRHBYTES);
++ shake256_inc_absorb(&state, sig, K*POLYW1_PACKEDBYTES);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(sig, CTILDEBYTES, &state);
+ poly_challenge(&cp, sig); /* uses only the first SEEDBYTES bytes of sig */
+ poly_ntt(&cp);
+
+@@ -175,6 +175,8 @@ rej:
+ if(n > OMEGA)
+ goto rej;
+
++ shake256_inc_ctx_release(&state);
++
+ /* Write signature */
+ pack_sig(sig, sig, &z, &h);
+ *siglen = CRYPTO_BYTES;
+@@ -240,7 +242,7 @@ int crypto_sign_verify(const uint8_t *sig,
+ poly cp;
+ polyvecl mat[K], z;
+ polyveck t1, w1, h;
+- keccak_state state;
++ shake256incctx state;
+
+ if(siglen != CRYPTO_BYTES)
+ return -1;
+@@ -253,11 +255,11 @@ int crypto_sign_verify(const uint8_t *sig,
+
+ /* Compute CRH(H(rho, t1), msg) */
+ shake256(mu, CRHBYTES, pk, CRYPTO_PUBLICKEYBYTES);
+- shake256_init(&state);
+- shake256_absorb(&state, mu, CRHBYTES);
+- shake256_absorb(&state, m, mlen);
+- shake256_finalize(&state);
+- shake256_squeeze(mu, CRHBYTES, &state);
++ shake256_inc_init(&state);
++ shake256_inc_absorb(&state, mu, CRHBYTES);
++ shake256_inc_absorb(&state, m, mlen);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(mu, CRHBYTES, &state);
+
+ /* Matrix-vector multiplication; compute Az - c2^dt1 */
+ poly_challenge(&cp, c); /* uses only the first SEEDBYTES bytes of c */
+@@ -281,11 +283,12 @@ int crypto_sign_verify(const uint8_t *sig,
+ polyveck_pack_w1(buf, &w1);
+
+ /* Call random oracle and verify challenge */
+- shake256_init(&state);
+- shake256_absorb(&state, mu, CRHBYTES);
+- shake256_absorb(&state, buf, K*POLYW1_PACKEDBYTES);
+- shake256_finalize(&state);
+- shake256_squeeze(c2, CTILDEBYTES, &state);
++ shake256_inc_ctx_reset(&state);
++ shake256_inc_absorb(&state, mu, CRHBYTES);
++ shake256_inc_absorb(&state, buf, K*POLYW1_PACKEDBYTES);
++ shake256_inc_finalize(&state);
++ shake256_inc_squeeze(c2, CTILDEBYTES, &state);
++ shake256_inc_ctx_release(&state);
+ for(i = 0; i < CTILDEBYTES; ++i)
+ if(c[i] != c2[i])
+ return -1;
+diff --git a/ref/symmetric-shake.c b/ref/symmetric-shake.c
+index 11ec09c..963f649 100644
+--- a/ref/symmetric-shake.c
++++ b/ref/symmetric-shake.c
+@@ -3,26 +3,26 @@
+ #include "symmetric.h"
+ #include "fips202.h"
+
+-void dilithium_shake128_stream_init(keccak_state *state, const uint8_t seed[SEEDBYTES], uint16_t nonce)
++void dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce)
+ {
+ uint8_t t[2];
+ t[0] = nonce;
+ t[1] = nonce >> 8;
+
+- shake128_init(state);
+- shake128_absorb(state, seed, SEEDBYTES);
+- shake128_absorb(state, t, 2);
+- shake128_finalize(state);
++ shake128_inc_init(state);
++ shake128_inc_absorb(state, seed, SEEDBYTES);
++ shake128_inc_absorb(state, t, 2);
++ shake128_inc_finalize(state);
+ }
+
+-void dilithium_shake256_stream_init(keccak_state *state, const uint8_t seed[CRHBYTES], uint16_t nonce)
++void dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce)
+ {
+ uint8_t t[2];
+ t[0] = nonce;
+ t[1] = nonce >> 8;
+
+- shake256_init(state);
+- shake256_absorb(state, seed, CRHBYTES);
+- shake256_absorb(state, t, 2);
+- shake256_finalize(state);
++ shake256_inc_init(state);
++ shake256_inc_absorb(state, seed, CRHBYTES);
++ shake256_inc_absorb(state, t, 2);
++ shake256_inc_finalize(state);
+ }
+diff --git a/ref/symmetric.h b/ref/symmetric.h
+index cba12d1..211de3b 100644
+--- a/ref/symmetric.h
++++ b/ref/symmetric.h
+@@ -6,16 +6,16 @@
+
+ #include "fips202.h"
+
+-typedef keccak_state stream128_state;
+-typedef keccak_state stream256_state;
++typedef shake128incctx stream128_state;
++typedef shake256incctx stream256_state;
+
+ #define dilithium_shake128_stream_init DILITHIUM_NAMESPACE(dilithium_shake128_stream_init)
+-void dilithium_shake128_stream_init(keccak_state *state,
++void dilithium_shake128_stream_init(shake128incctx *state,
+ const uint8_t seed[SEEDBYTES],
+ uint16_t nonce);
+
+ #define dilithium_shake256_stream_init DILITHIUM_NAMESPACE(dilithium_shake256_stream_init)
+-void dilithium_shake256_stream_init(keccak_state *state,
++void dilithium_shake256_stream_init(shake256incctx *state,
+ const uint8_t seed[CRHBYTES],
+ uint16_t nonce);
+
+@@ -26,9 +26,11 @@ void dilithium_shake256_stream_init(keccak_state *state,
+ dilithium_shake128_stream_init(STATE, SEED, NONCE)
+ #define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \
+ shake128_squeezeblocks(OUT, OUTBLOCKS, STATE)
++#define stream128_release(STATE) shake128_inc_ctx_release(STATE)
+ #define stream256_init(STATE, SEED, NONCE) \
+ dilithium_shake256_stream_init(STATE, SEED, NONCE)
+ #define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \
+ shake256_squeezeblocks(OUT, OUTBLOCKS, STATE)
++#define stream256_release(STATE) shake256_inc_ctx_release(STATE)
+
+ #endif
diff --git a/scripts/copy_from_upstream/patches/pqcrystals-ml_kem_ipd.patch b/scripts/copy_from_upstream/patches/pqcrystals-ml_kem_ipd.patch
new file mode 100644
index 0000000000..ba138bf3cd
--- /dev/null
+++ b/scripts/copy_from_upstream/patches/pqcrystals-ml_kem_ipd.patch
@@ -0,0 +1,448 @@
+diff --git a/Kyber1024_META.yml b/ML-KEM-1024-ipd_META.yml
+index baa5ca3..ffafcf0 100644
+--- a/Kyber1024_META.yml
++++ b/ML-KEM-1024-ipd_META.yml
+@@ -1,4 +1,4 @@
+-name: Kyber1024
++name: ML-KEM-1024-ipd
+ type: kem
+ claimed-nist-level: 5
+ claimed-security: IND-CCA2
+@@ -6,8 +6,8 @@ length-public-key: 1568
+ length-ciphertext: 1568
+ length-secret-key: 3168
+ length-shared-secret: 32
+-nistkat-sha256: 5afcf2a568ad32d49b55105b032af1850f03f3888ff9e2a72f4059c58e968f60
+-testvectors-sha256: ff1a854b9b6761a70c65ccae85246fe0596a949e72eae0866a8a2a2d4ea54b10
++nistkat-sha256: 03d6494b74c45d010e61b0328c1ab318c4df3b7f9dbd04d0e35b3468848584b7
++testvectors-sha256: 85ab251d6e749e6b27507a8a6ec473ba2e8419c1aef87d0cd5ec9903c1bb92df
+ principal-submitters:
+ - Peter Schwabe
+ auxiliary-submitters:
+@@ -22,22 +22,20 @@ auxiliary-submitters:
+ - Damien Stehlé
+ implementations:
+ - name: ref
+- version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff
++ version: https://github.com/pq-crystals/kyber/tree/standard
+ folder_name: ref
+ compile_opts: -DKYBER_K=4
+- signature_keypair: pqcrystals_kyber1024_ref_keypair
+- signature_enc: pqcrystals_kyber1024_ref_enc
+- signature_dec: pqcrystals_kyber1024_ref_dec
+- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c
+- common_dep: common_ref
++ signature_keypair: pqcrystals_ml_kem_1024_ipd_ref_keypair
++ signature_enc: pqcrystals_ml_kem_1024_ipd_ref_enc
++ signature_dec: pqcrystals_ml_kem_1024_ipd_ref_dec
++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h symmetric-shake.c
+ - name: avx2
+- version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff
++ version: https://github.com/pq-crystals/kyber/tree/standard
+ compile_opts: -DKYBER_K=4
+- signature_keypair: pqcrystals_kyber1024_avx2_keypair
+- signature_enc: pqcrystals_kyber1024_avx2_enc
+- signature_dec: pqcrystals_kyber1024_avx2_dec
+- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c
+- common_dep: common_avx2 common_keccak4x_avx2
++ signature_keypair: pqcrystals_ml_kem_1024_ipd_avx2_keypair
++ signature_enc: pqcrystals_ml_kem_1024_ipd_avx2_enc
++ signature_dec: pqcrystals_ml_kem_1024_ipd_avx2_dec
++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h symmetric-shake.c
+ supported_platforms:
+ - architecture: x86_64
+ operating_systems:
+diff --git a/Kyber512_META.yml b/ML-KEM-512-ipd_META.yml
+index b251701..d20f0b1 100644
+--- a/Kyber512_META.yml
++++ b/ML-KEM-512-ipd_META.yml
+@@ -1,4 +1,4 @@
+-name: Kyber512
++name: ML-KEM-512-ipd
+ type: kem
+ claimed-nist-level: 1
+ claimed-security: IND-CCA2
+@@ -6,8 +6,8 @@ length-public-key: 800
+ length-ciphertext: 768
+ length-secret-key: 1632
+ length-shared-secret: 32
+-nistkat-sha256: bb0481d3325d828817900b709d23917cefbc10026fc857f098979451f67bb0ca
+-testvectors-sha256: 6730bb552c22d9d2176ffb5568e48eb30952cf1f065073ec5f9724f6a3c6ea85
++nistkat-sha256: 76aae1fa3f8367522700b22da635a5bc4ced4298edb0eb9947aa3ba60d62676f
++testvectors-sha256: e1ac6fb45e2511f4170a3527c0c50dcd61336f47113df7a299a61ef8394bd669
+ principal-submitters:
+ - Peter Schwabe
+ auxiliary-submitters:
+@@ -22,22 +22,20 @@ auxiliary-submitters:
+ - Damien Stehlé
+ implementations:
+ - name: ref
+- version: https://github.com/pq-crystals/kyber/commit/74cad307858b61e434490c75f812cb9b9ef7279b
++ version: https://github.com/pq-crystals/kyber/tree/standard
+ folder_name: ref
+ compile_opts: -DKYBER_K=2
+- signature_keypair: pqcrystals_kyber512_ref_keypair
+- signature_enc: pqcrystals_kyber512_ref_enc
+- signature_dec: pqcrystals_kyber512_ref_dec
+- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c
+- common_dep: common_ref
++ signature_keypair: pqcrystals_ml_kem_512_ipd_ref_keypair
++ signature_enc: pqcrystals_ml_kem_512_ipd_ref_enc
++ signature_dec: pqcrystals_ml_kem_512_ipd_ref_dec
++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h symmetric-shake.c
+ - name: avx2
+- version: https://github.com/pq-crystals/kyber/commit/36414d64fc1890ed58d1ca8b1e0cab23635d1ac2
++ version: https://github.com/pq-crystals/kyber/tree/standard
+ compile_opts: -DKYBER_K=2
+- signature_keypair: pqcrystals_kyber512_avx2_keypair
+- signature_enc: pqcrystals_kyber512_avx2_enc
+- signature_dec: pqcrystals_kyber512_avx2_dec
+- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c
+- common_dep: common_avx2 common_keccak4x_avx2
++ signature_keypair: pqcrystals_ml_kem_512_ipd_avx2_keypair
++ signature_enc: pqcrystals_ml_kem_512_ipd_avx2_enc
++ signature_dec: pqcrystals_ml_kem_512_ipd_avx2_dec
++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h symmetric-shake.c
+ supported_platforms:
+ - architecture: x86_64
+ operating_systems:
+diff --git a/Kyber768_META.yml b/ML-KEM-768-ipd_META.yml
+index 7a0cc3d..e768cd5 100644
+--- a/Kyber768_META.yml
++++ b/ML-KEM-768-ipd_META.yml
+@@ -1,4 +1,4 @@
+-name: Kyber768
++name: ML-KEM-768-ipd
+ type: kem
+ claimed-nist-level: 3
+ claimed-security: IND-CCA2
+@@ -6,8 +6,8 @@ length-public-key: 1184
+ length-ciphertext: 1088
+ length-secret-key: 2400
+ length-shared-secret: 32
+-nistkat-sha256: 89e82a5bf2d4ddb2c6444e10409e6d9ca65dafbca67d1a0db2c9b54920a29172
+-testvectors-sha256: 667c8ca2ca93729c0df6ff24588460bad1bbdbfb64ece0fe8563852a7ff348c6
++nistkat-sha256: c7e76b4b30c786b5b70c152a446e7832c1cb42b3816ec048dbeaf7041211b310
++testvectors-sha256: 2586721a714c439f6fef26e29ee1c4c67c6207186f810617f278e6ce3e67ea0d
+ principal-submitters:
+ - Peter Schwabe
+ auxiliary-submitters:
+@@ -22,22 +22,20 @@ auxiliary-submitters:
+ - Damien Stehlé
+ implementations:
+ - name: ref
+- version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff
++ version: https://github.com/pq-crystals/kyber/tree/standard
+ folder_name: ref
+ compile_opts: -DKYBER_K=3
+- signature_keypair: pqcrystals_kyber768_ref_keypair
+- signature_enc: pqcrystals_kyber768_ref_enc
+- signature_dec: pqcrystals_kyber768_ref_dec
+- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c
+- common_dep: common_ref
++ signature_keypair: pqcrystals_ml_kem_768_ipd_ref_keypair
++ signature_enc: pqcrystals_ml_kem_768_ipd_ref_enc
++ signature_dec: pqcrystals_ml_kem_768_ipd_ref_dec
++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h symmetric-shake.c
+ - name: avx2
+- version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff
++ version: https://github.com/pq-crystals/kyber/tree/standard
+ compile_opts: -DKYBER_K=3
+- signature_keypair: pqcrystals_kyber768_avx2_keypair
+- signature_enc: pqcrystals_kyber768_avx2_enc
+- signature_dec: pqcrystals_kyber768_avx2_dec
+- sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c
+- common_dep: common_avx2 common_keccak4x_avx2
++ signature_keypair: pqcrystals_ml_kem_768_ipd_avx2_keypair
++ signature_enc: pqcrystals_ml_kem_768_ipd_avx2_enc
++ signature_dec: pqcrystals_ml_kem_768_ipd_avx2_dec
++ sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h symmetric-shake.c
+ supported_platforms:
+ - architecture: x86_64
+ operating_systems:
+diff --git a/avx2/indcpa.c b/avx2/indcpa.c
+index 4f3b782..572ce49 100644
+--- a/avx2/indcpa.c
++++ b/avx2/indcpa.c
+@@ -175,7 +175,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ unsigned int ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+- keccakx4_state state;
++ shake128x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+@@ -204,6 +204,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ buf[3].coeffs[33] = 1;
+ }
+
++ shake128x4_inc_init(&state);
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+@@ -225,6 +226,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ poly_nttunpack(&a[0].vec[1]);
+ poly_nttunpack(&a[1].vec[0]);
+ poly_nttunpack(&a[1].vec[1]);
++ shake128x4_inc_ctx_release(&state);
+ }
+ #elif KYBER_K == 3
+ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+@@ -232,8 +234,8 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ unsigned int ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+- keccakx4_state state;
+- keccak_state state1x;
++ shake128x4incctx state;
++ shake128incctx state1x;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+@@ -262,6 +264,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ buf[3].coeffs[33] = 1;
+ }
+
++ shake128x4_inc_init(&state);
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+@@ -327,6 +330,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
++ shake128x4_inc_ctx_release(&state);
+
+ poly_nttunpack(&a[1].vec[1]);
+ poly_nttunpack(&a[1].vec[2]);
+@@ -337,6 +341,8 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ _mm256_store_si256(buf[0].vec, f);
+ buf[0].coeffs[32] = 2;
+ buf[0].coeffs[33] = 2;
++
++ shake128_inc_init(&state1x);
+ shake128_absorb_once(&state1x, buf[0].coeffs, 34);
+ shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x);
+ ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs);
+@@ -344,6 +350,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ shake128_squeezeblocks(buf[0].coeffs, 1, &state1x);
+ ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ }
++ shake128_inc_ctx_release(&state1x);
+
+ poly_nttunpack(&a[2].vec[2]);
+ }
+@@ -353,7 +360,8 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ unsigned int i, ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+- keccakx4_state state;
++ shake128x4incctx state;
++ shake128x4_inc_init(&state);
+
+ for(i=0;i<4;i++) {
+ f = _mm256_loadu_si256((__m256i *)seed);
+@@ -405,6 +413,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+ poly_nttunpack(&a[i].vec[2]);
+ poly_nttunpack(&a[i].vec[3]);
+ }
++ shake128x4_inc_ctx_release(&state);
+ }
+ #endif
+
+diff --git a/avx2/params.h b/avx2/params.h
+index bc70ebf..fdc688e 100644
+--- a/avx2/params.h
++++ b/avx2/params.h
+@@ -12,19 +12,19 @@
+ #ifdef KYBER_90S
+ #define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s
+ #else
+-#define KYBER_NAMESPACE(s) pqcrystals_kyber512_avx2_##s
++#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_avx2_##s
+ #endif
+ #elif (KYBER_K == 3)
+ #ifdef KYBER_90S
+ #define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s
+ #else
+-#define KYBER_NAMESPACE(s) pqcrystals_kyber768_avx2_##s
++#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_avx2_##s
+ #endif
+ #elif (KYBER_K == 4)
+ #ifdef KYBER_90S
+ #define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s
+ #else
+-#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_avx2_##s
++#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_avx2_##s
+ #endif
+ #else
+ #error "KYBER_K must be in {2,3,4}"
+diff --git a/avx2/poly.c b/avx2/poly.c
+index ab148a2..96bad86 100644
+--- a/avx2/poly.c
++++ b/avx2/poly.c
+@@ -2,6 +2,7 @@
+ #include
+ #include
+ #include "align.h"
++#include "fips202x4.h"
+ #include "params.h"
+ #include "poly.h"
+ #include "ntt.h"
+@@ -412,7 +413,7 @@ void poly_getnoise_eta1_4x(poly *r0,
+ {
+ ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4];
+ __m256i f;
+- keccakx4_state state;
++ shake256x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+@@ -425,8 +426,10 @@ void poly_getnoise_eta1_4x(poly *r0,
+ buf[2].coeffs[32] = nonce2;
+ buf[3].coeffs[32] = nonce3;
+
++ shake256x4_inc_init(&state);
+ shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33);
+ shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state);
++ shake256x4_inc_ctx_release(&state);
+
+ poly_cbd_eta1(r0, buf[0].vec);
+ poly_cbd_eta1(r1, buf[1].vec);
+@@ -447,7 +450,7 @@ void poly_getnoise_eta1122_4x(poly *r0,
+ {
+ ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4];
+ __m256i f;
+- keccakx4_state state;
++ shake256x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+@@ -460,8 +463,10 @@ void poly_getnoise_eta1122_4x(poly *r0,
+ buf[2].coeffs[32] = nonce2;
+ buf[3].coeffs[32] = nonce3;
+
++ shake256x4_inc_init(&state);
+ shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33);
+ shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state);
++ shake256x4_inc_ctx_release(&state);
+
+ poly_cbd_eta1(r0, buf[0].vec);
+ poly_cbd_eta1(r1, buf[1].vec);
+diff --git a/avx2/symmetric.h b/avx2/symmetric.h
+index 627b891..e4941f7 100644
+--- a/avx2/symmetric.h
++++ b/avx2/symmetric.h
+@@ -8,10 +8,10 @@
+ #include "fips202.h"
+ #include "fips202x4.h"
+
+-typedef keccak_state xof_state;
++typedef shake128incctx xof_state;
+
+ #define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb)
+-void kyber_shake128_absorb(keccak_state *s,
++void kyber_shake128_absorb(shake128incctx *s,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y);
+diff --git a/ref/indcpa.c b/ref/indcpa.c
+index 5d74518..4a8b4c8 100644
+--- a/ref/indcpa.c
++++ b/ref/indcpa.c
+@@ -164,6 +164,7 @@ void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed)
+ unsigned int buflen, off;
+ uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2];
+ xof_state state;
++ xof_init(&state, seed);
+
+ for(i=0;i
{% for scheme in schemes -%}
-#ifdef OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}
+#if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}){%- endif %}
#define OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_public_key {{ scheme['metadata']['length-public-key'] }}
#define OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_secret_key {{ scheme['metadata']['length-secret-key'] }}
#define OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_ciphertext {{ scheme['metadata']['length-ciphertext'] }}
@@ -15,6 +15,16 @@ OQS_KEM *OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_new(void);
OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_keypair(uint8_t *public_key, uint8_t *secret_key);
OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key);
OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
+{% if 'alias_scheme' in scheme %}
+#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_public_key OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_public_key
+#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_secret_key OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_secret_key
+#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_ciphertext OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_ciphertext
+#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_shared_secret OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_length_shared_secret
+OQS_KEM *OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_new(void);
+#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_keypair OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_keypair
+#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_encaps OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_encaps
+#define OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_decaps OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_decaps
+{% endif -%}
#endif
{% endfor -%}
diff --git a/scripts/copy_from_upstream/src/kem/family/kem_scheme.c b/scripts/copy_from_upstream/src/kem/family/kem_scheme.c
index 027a88e76f..058d829e8b 100644
--- a/scripts/copy_from_upstream/src/kem/family/kem_scheme.c
+++ b/scripts/copy_from_upstream/src/kem/family/kem_scheme.c
@@ -31,6 +31,34 @@ OQS_KEM *OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_new(void) {
return kem;
}
+{%- if 'alias_scheme' in scheme %}
+
+/** Alias */
+OQS_KEM *OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_new(void) {
+
+ OQS_KEM *kem = malloc(sizeof(OQS_KEM));
+ if (kem == NULL) {
+ return NULL;
+ }
+ kem->method_name = OQS_KEM_alg_{{ family }}_{{ scheme['alias_scheme'] }};
+ kem->alg_version = "{{ scheme['metadata']['implementations'][0]['version'] }}";
+
+ kem->claimed_nist_level = {{ scheme['metadata']['claimed-nist-level'] }};
+ kem->ind_cca = {{ scheme['metadata']['ind_cca'] }};
+
+ kem->length_public_key = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_public_key;
+ kem->length_secret_key = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_secret_key;
+ kem->length_ciphertext = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_ciphertext;
+ kem->length_shared_secret = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_length_shared_secret;
+
+ kem->keypair = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_keypair;
+ kem->encaps = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_encaps;
+ kem->decaps = OQS_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_decaps;
+
+ return kem;
+}
+{%- endif -%}
+
{%- for impl in scheme['metadata']['implementations'] if impl['name'] == scheme['default_implementation'] %}
{%- if impl['signature_keypair'] %}
diff --git a/scripts/copy_from_upstream/src/kem/kem.c/alg_identifier.fragment b/scripts/copy_from_upstream/src/kem/kem.c/alg_identifier.fragment
index b11c404684..c6729cb653 100644
--- a/scripts/copy_from_upstream/src/kem/kem.c/alg_identifier.fragment
+++ b/scripts/copy_from_upstream/src/kem/kem.c/alg_identifier.fragment
@@ -1,3 +1,7 @@
{% for family in instructions['kems'] %}{% for scheme in family['schemes'] %}
- OQS_KEM_alg_{{ family['name'] }}_{{ scheme['scheme'] }},{% endfor %}{% endfor %}
+ OQS_KEM_alg_{{ family['name'] }}_{{ scheme['scheme'] }},
+{%- if 'alias_scheme' in scheme %}
+ OQS_KEM_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }},
+{%- endif -%}
+{% endfor %}{% endfor %}
\ No newline at end of file
diff --git a/scripts/copy_from_upstream/src/kem/kem.c/enabled_case.fragment b/scripts/copy_from_upstream/src/kem/kem.c/enabled_case.fragment
index da1ff1d885..ba58364ee5 100644
--- a/scripts/copy_from_upstream/src/kem/kem.c/enabled_case.fragment
+++ b/scripts/copy_from_upstream/src/kem/kem.c/enabled_case.fragment
@@ -4,5 +4,14 @@
return 1;
#else
return 0;
-#endif{% endfor %}{% endfor %}
+#endif
+{% if 'alias_scheme' in scheme %}
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }})) {
+#ifdef OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}
+ return 1;
+#else
+ return 0;
+#endif
+{% endif -%}
+{% endfor %}{% endfor %}
diff --git a/scripts/copy_from_upstream/src/kem/kem.c/new_case.fragment b/scripts/copy_from_upstream/src/kem/kem.c/new_case.fragment
index af0b40b04d..63db6d636a 100644
--- a/scripts/copy_from_upstream/src/kem/kem.c/new_case.fragment
+++ b/scripts/copy_from_upstream/src/kem/kem.c/new_case.fragment
@@ -4,5 +4,14 @@
return OQS_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}_new();
#else
return NULL;
-#endif{% endfor %}{% endfor %}
+#endif
+{% if 'alias_scheme' in scheme %}
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }})) {
+#ifdef OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}
+ return OQS_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}_new();
+#else
+ return NULL;
+#endif
+{% endif -%}
+{% endfor %}{% endfor %}
diff --git a/scripts/copy_from_upstream/src/kem/kem.h/alg_identifier.fragment b/scripts/copy_from_upstream/src/kem/kem.h/alg_identifier.fragment
index 635a7d6622..e3166e7ccb 100644
--- a/scripts/copy_from_upstream/src/kem/kem.h/alg_identifier.fragment
+++ b/scripts/copy_from_upstream/src/kem/kem.h/alg_identifier.fragment
@@ -1,4 +1,9 @@
{% for family in instructions['kems'] %}{% for scheme in family['schemes'] %}
/** Algorithm identifier for {{ scheme['pretty_name_full'] }} KEM. */
-#define OQS_KEM_alg_{{ family['name'] }}_{{ scheme['scheme'] }} "{{ scheme['pretty_name_full'] }}"{% endfor %}{% endfor %}
+#define OQS_KEM_alg_{{ family['name'] }}_{{ scheme['scheme'] }} "{{ scheme['pretty_name_full'] }}"
+{%- if 'alias_scheme' in scheme %}
+/** Algorithm identifier for {{ scheme['alias_pretty_name_full'] }} KEM. */
+#define OQS_KEM_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }} "{{ scheme['alias_pretty_name_full'] }}"
+{%- endif -%}
+{% endfor %}{% endfor %}
diff --git a/scripts/copy_from_upstream/src/kem/kem.h/algs_length.fragment b/scripts/copy_from_upstream/src/kem/kem.h/algs_length.fragment
index e86a2a0faf..79e87d08b7 100644
--- a/scripts/copy_from_upstream/src/kem/kem.h/algs_length.fragment
+++ b/scripts/copy_from_upstream/src/kem/kem.h/algs_length.fragment
@@ -1,4 +1,5 @@
{% set unary %}{% for family in instructions['kems'] %}{% for scheme in family['schemes'] %}1{% endfor %}{% endfor %}{% endset %}
+{% set unary_alias %}{% for family in instructions['kems'] %}{% for scheme in family['schemes'] if 'alias_scheme' in scheme %}2{% endfor %}{% endfor %}{% endset %}
/** Number of algorithm identifiers above. */
-#define OQS_KEM_algs_length {{ unary|length + non_upstream_kems }}
+#define OQS_KEM_algs_length {{ unary|length + unary_alias|length + non_upstream_kems }}
diff --git a/scripts/copy_from_upstream/src/oqsconfig.h.cmake/add_alg_enable_defines.fragment b/scripts/copy_from_upstream/src/oqsconfig.h.cmake/add_alg_enable_defines.fragment
index 82d5d9b8df..2bc517ac22 100644
--- a/scripts/copy_from_upstream/src/oqsconfig.h.cmake/add_alg_enable_defines.fragment
+++ b/scripts/copy_from_upstream/src/oqsconfig.h.cmake/add_alg_enable_defines.fragment
@@ -3,6 +3,9 @@
#cmakedefine OQS_ENABLE_KEM_{{ family['name']|upper }} 1
{%- for scheme in family['schemes'] %}
#cmakedefine OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }} 1
+{%- if 'alias_scheme' in scheme %}
+#cmakedefine OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }} 1
+{%- endif -%}
{%- for impl in scheme['metadata']['implementations'] if impl['name'] != family['default_implementation'] %}
#cmakedefine OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}_{{ impl['name'] }} 1
{%- endfor -%}
@@ -14,6 +17,9 @@
#cmakedefine OQS_ENABLE_SIG_{{ family['name']|upper }} 1
{%- for scheme in family['schemes'] %}
#cmakedefine OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['scheme'] }} 1
+{%- if 'alias_scheme' in scheme %}
+#cmakedefine OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }} 1
+{%- endif -%}
{%- for impl in scheme['metadata']['implementations'] if impl['name'] != family['default_implementation'] %}
#cmakedefine OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['scheme'] }}_{{ impl['name'] }} 1
{%- endfor -%}
diff --git a/scripts/copy_from_upstream/src/sig/family/sig_family.h b/scripts/copy_from_upstream/src/sig/family/sig_family.h
index 2af20417ca..b17d621635 100644
--- a/scripts/copy_from_upstream/src/sig/family/sig_family.h
+++ b/scripts/copy_from_upstream/src/sig/family/sig_family.h
@@ -6,7 +6,7 @@
#include
{% for scheme in schemes -%}
-#ifdef OQS_ENABLE_SIG_{{ family }}_{{ scheme['scheme'] }}
+#if defined(OQS_ENABLE_SIG_{{ family }}_{{ scheme['scheme'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_SIG_{{ family }}_{{ scheme['alias_scheme'] }}){%- endif %}
#define OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_public_key {{ scheme['metadata']['length-public-key'] }}
#define OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_secret_key {{ scheme['metadata']['length-secret-key'] }}
#define OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_signature {{ scheme['metadata']['length-signature'] }}
@@ -15,6 +15,16 @@ OQS_SIG *OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_new(void);
OQS_API OQS_STATUS OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_keypair(uint8_t *public_key, uint8_t *secret_key);
OQS_API OQS_STATUS OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_sign(uint8_t *signature, size_t *signature_len, const uint8_t *message, size_t message_len, const uint8_t *secret_key);
OQS_API OQS_STATUS OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_verify(const uint8_t *message, size_t message_len, const uint8_t *signature, size_t signature_len, const uint8_t *public_key);
+{% if 'alias_scheme' in scheme %}
+#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_length_public_key OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_public_key
+#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_length_secret_key OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_secret_key
+#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_length_ciphertext OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_ciphertext
+#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_length_shared_secret OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_shared_secret
+OQS_SIG *OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_new(void);
+#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_keypair OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_keypair
+#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_encaps OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_encaps
+#define OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_decaps OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_decaps
+{% endif -%}
#endif
{% endfor -%}
diff --git a/scripts/copy_from_upstream/src/sig/family/sig_scheme.c b/scripts/copy_from_upstream/src/sig/family/sig_scheme.c
index 5b8927a83c..928ef3d65f 100644
--- a/scripts/copy_from_upstream/src/sig/family/sig_scheme.c
+++ b/scripts/copy_from_upstream/src/sig/family/sig_scheme.c
@@ -30,6 +30,33 @@ OQS_SIG *OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_new(void) {
return sig;
}
+{%- if 'alias_scheme' in scheme %}
+
+/** Alias */
+OQS_SIG *OQS_SIG_{{ family }}_{{ scheme['alias_scheme'] }}_new(void) {
+
+ OQS_SIG *sig = malloc(sizeof(OQS_SIG));
+ if (sig == NULL) {
+ return NULL;
+ }
+ sig->method_name = OQS_SIG_alg_{{ family }}_{{ scheme['alias_scheme'] }};
+ sig->alg_version = "{{ scheme['metadata']['implementations'][0]['version'] }}";
+
+ sig->claimed_nist_level = {{ scheme['metadata']['claimed-nist-level'] }};
+ sig->euf_cma = {{ scheme['metadata']['euf_cma'] }};
+
+ sig->length_public_key = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_public_key;
+ sig->length_secret_key = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_secret_key;
+ sig->length_signature = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_length_signature;
+
+ sig->keypair = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_keypair;
+ sig->sign = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_sign;
+ sig->verify = OQS_SIG_{{ family }}_{{ scheme['scheme'] }}_verify;
+
+ return sig;
+}
+{%- endif -%}
+
{%- for impl in scheme['metadata']['implementations'] if impl['name'] == scheme['default_implementation'] %}
{%- if impl['signature_keypair'] %}
diff --git a/scripts/copy_from_upstream/src/sig/sig.c/alg_identifier.fragment b/scripts/copy_from_upstream/src/sig/sig.c/alg_identifier.fragment
index 3b4e54ad29..87d9c6fa5f 100644
--- a/scripts/copy_from_upstream/src/sig/sig.c/alg_identifier.fragment
+++ b/scripts/copy_from_upstream/src/sig/sig.c/alg_identifier.fragment
@@ -1,3 +1,6 @@
{% for family in instructions['sigs'] %}{% for scheme in family['schemes'] %}
- OQS_SIG_alg_{{ family['name'] }}_{{ scheme['scheme'] }},{% endfor %}{% endfor %}
-
\ No newline at end of file
+ OQS_SIG_alg_{{ family['name'] }}_{{ scheme['scheme'] }},
+{%- if 'alias_scheme' in scheme %}
+ OQS_SIG_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }},
+{%- endif -%}
+{% endfor %}{% endfor %}
diff --git a/scripts/copy_from_upstream/src/sig/sig.c/enabled_case.fragment b/scripts/copy_from_upstream/src/sig/sig.c/enabled_case.fragment
index c85d08adff..8ff702c3f9 100644
--- a/scripts/copy_from_upstream/src/sig/sig.c/enabled_case.fragment
+++ b/scripts/copy_from_upstream/src/sig/sig.c/enabled_case.fragment
@@ -4,5 +4,13 @@
return 1;
#else
return 0;
-#endif{% endfor %}{% endfor %}
-
+#endif
+{% if 'alias_scheme' in scheme %}
+ } else if (0 == strcasecmp(method_name, OQS_SIG_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }})) {
+#ifdef OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }}
+ return 1;
+#else
+ return 0;
+#endif
+{% endif -%}
+{% endfor %}{% endfor %}
\ No newline at end of file
diff --git a/scripts/copy_from_upstream/src/sig/sig.c/new_case.fragment b/scripts/copy_from_upstream/src/sig/sig.c/new_case.fragment
index e874f8e14f..2600f9b35d 100644
--- a/scripts/copy_from_upstream/src/sig/sig.c/new_case.fragment
+++ b/scripts/copy_from_upstream/src/sig/sig.c/new_case.fragment
@@ -4,5 +4,13 @@
return OQS_SIG_{{ family['name'] }}_{{ scheme['scheme'] }}_new();
#else
return NULL;
-#endif{% endfor %}{% endfor %}
-
+#endif
+{% if 'alias_scheme' in scheme %}
+ } else if (0 == strcasecmp(method_name, OQS_SIG_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }})) {
+#ifdef OQS_ENABLE_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }}
+ return OQS_SIG_{{ family['name'] }}_{{ scheme['alias_scheme'] }}_new();
+#else
+ return NULL;
+#endif
+{% endif -%}
+{% endfor %}{% endfor %}
diff --git a/scripts/copy_from_upstream/src/sig/sig.h/alg_identifier.fragment b/scripts/copy_from_upstream/src/sig/sig.h/alg_identifier.fragment
index dc940087b3..9de830f9ab 100644
--- a/scripts/copy_from_upstream/src/sig/sig.h/alg_identifier.fragment
+++ b/scripts/copy_from_upstream/src/sig/sig.h/alg_identifier.fragment
@@ -1,4 +1,9 @@
{% for family in instructions['sigs'] %}{% for scheme in family['schemes'] %}
/** Algorithm identifier for {{ scheme['pretty_name_full'] }} */
-#define OQS_SIG_alg_{{ family['name'] }}_{{ scheme['scheme'] }} "{{ scheme['pretty_name_full'] }}"{% endfor %}{% endfor %}
+#define OQS_SIG_alg_{{ family['name'] }}_{{ scheme['scheme'] }} "{{ scheme['pretty_name_full'] }}"
+{%- if 'alias_scheme' in scheme %}
+/** Algorithm identifier for {{ scheme['alias_pretty_name_full'] }} SIG. */
+#define OQS_SIG_alg_{{ family['name'] }}_{{ scheme['alias_scheme'] }} "{{ scheme['alias_pretty_name_full'] }}"
+{%- endif -%}
+{% endfor %}{% endfor %}
diff --git a/scripts/copy_from_upstream/src/sig/sig.h/algs_length.fragment b/scripts/copy_from_upstream/src/sig/sig.h/algs_length.fragment
index ea35d7ab55..0ac7133145 100644
--- a/scripts/copy_from_upstream/src/sig/sig.h/algs_length.fragment
+++ b/scripts/copy_from_upstream/src/sig/sig.h/algs_length.fragment
@@ -1,4 +1,5 @@
{% set unary %}{% for family in instructions['sigs'] %}{% for scheme in family['schemes'] %}1{% endfor %}{% endfor %}{% endset %}
+{% set unary_alias %}{% for family in instructions['sigs'] %}{% for scheme in family['schemes'] if 'alias_scheme' in scheme %}2{% endfor %}{% endfor %}{% endset %}
/** Number of algorithm identifiers above. */
-#define OQS_SIG_algs_length {{ unary|length }}
+#define OQS_SIG_algs_length {{ unary|length + unary_alias|length }}
diff --git a/scripts/copy_from_upstream/tests/kat_sig.c/combine_message_signature.fragment b/scripts/copy_from_upstream/tests/kat_sig.c/combine_message_signature.fragment
index c76d13512a..25609f254e 100644
--- a/scripts/copy_from_upstream/tests/kat_sig.c/combine_message_signature.fragment
+++ b/scripts/copy_from_upstream/tests/kat_sig.c/combine_message_signature.fragment
@@ -1,5 +1,5 @@
{% for family in instructions['sigs'] %}{% for scheme in family['schemes'] %}
- } else if (0 == strcmp(sig->method_name, "{{ scheme['pretty_name_full'] }}")) {
+ } else if (0 == strcmp(sig->method_name, "{{ scheme['pretty_name_full'] }}"){%- if 'alias_scheme' in scheme %} || 0 == strcmp(sig->method_name, "{{ scheme['alias_pretty_name_full'] }}"){%- endif -%}) {
{%- if scheme['signed_msg_order'] == 'sig_then_msg' %}
// signed_msg = signature || msg
*signed_msg_len = signature_len + msg_len;
diff --git a/scripts/update_docs_from_yaml.py b/scripts/update_docs_from_yaml.py
index 96a4b1887c..ef152d376a 100644
--- a/scripts/update_docs_from_yaml.py
+++ b/scripts/update_docs_from_yaml.py
@@ -62,6 +62,7 @@ def do_it(liboqs_root):
out_md.write('\n## Parameter set summary\n\n')
table = [['Parameter set',
+ 'Parameter set alias',
'Security model',
'Claimed NIST Level',
'Public key size (bytes)',
@@ -70,6 +71,7 @@ def do_it(liboqs_root):
'Shared secret size (bytes)']]
for parameter_set in kem_yaml['parameter-sets']:
table.append([parameter_set['name'],
+ parameter_set['alias'] if 'alias' in parameter_set else "NA",
parameter_set['claimed-security'],
parameter_set['claimed-nist-level'],
parameter_set['length-public-key'],
@@ -186,6 +188,7 @@ def do_it(liboqs_root):
out_md.write('\n## Parameter set summary\n\n')
table = [['Parameter set',
+ 'Parameter set alias',
'Security model',
'Claimed NIST Level',
'Public key size (bytes)',
@@ -193,6 +196,7 @@ def do_it(liboqs_root):
'Signature size (bytes)']]
for parameter_set in sig_yaml['parameter-sets']:
table.append([parameter_set['name'].replace('_', '\_'),
+ parameter_set['alias'] if 'alias' in parameter_set else "NA",
parameter_set['claimed-security'],
parameter_set['claimed-nist-level'],
parameter_set['length-public-key'],
@@ -291,13 +295,21 @@ def do_it(liboqs_root):
parameter_sets = kem_yaml['parameter-sets']
if any(impl['large-stack-usage'] for impl in parameter_sets[0]['implementations']):
readme.write('- **{}**: {}†'.format(kem_yaml['name'], parameter_sets[0]['name']))
+ if 'alias' in parameter_sets[0]:
+ readme.write(' (alias: {})'.format(parameter_sets[0]['alias']))
else:
readme.write('- **{}**: {}'.format(kem_yaml['name'], parameter_sets[0]['name']))
+ if 'alias' in parameter_sets[0]:
+ readme.write(' (alias: {})'.format(parameter_sets[0]['alias']))
for parameter_set in parameter_sets[1:]:
if any(impl['large-stack-usage'] for impl in parameter_set['implementations']):
readme.write(', {}†'.format(parameter_set['name']))
+ if 'alias' in parameter_set:
+ readme.write(' (alias: {})'.format(parameter_set['alias']))
else:
readme.write(', {}'.format(parameter_set['name']))
+ if 'alias' in parameter_set:
+ readme.write(' (alias: {})'.format(parameter_set['alias']))
readme.write('\n')
readme.write(postamble)
@@ -318,13 +330,21 @@ def do_it(liboqs_root):
parameter_sets = sig_yaml['parameter-sets']
if any(impl['large-stack-usage'] for impl in parameter_sets[0]['implementations']):
readme.write('- **{}**: {}†'.format(sig_yaml['name'], parameter_sets[0]['name'].replace('_','\_')))
+ if 'alias' in parameter_sets[0]:
+ readme.write(' (alias: {})'.format(parameter_sets[0]['alias']).replace('_','\_'))
else:
readme.write('- **{}**: {}'.format(sig_yaml['name'], parameter_sets[0]['name'].replace('_','\_')))
+ if 'alias' in parameter_sets[0]:
+ readme.write(' (alias: {})'.format(parameter_sets[0]['alias']).replace('_','\_'))
for parameter_set in parameter_sets[1:]:
if any(impl['large-stack-usage'] for impl in parameter_set['implementations']):
readme.write(', {}†'.format(parameter_set['name'].replace('_', '\_')))
+ if 'alias' in parameter_set:
+ readme.write(' (alias: {})'.format(parameter_set['alias']).replace('_','\_'))
else:
readme.write(', {}'.format(parameter_set['name'].replace('_', '\_')))
+ if 'alias' in parameter_set:
+ readme.write(' (alias: {})'.format(parameter_set['alias']).replace('_','\_'))
readme.write('\n')
sphincs_yml = sig_yamls[-1]
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ca5adf4070..1f9ed06e5c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -34,10 +34,18 @@ if(OQS_ENABLE_KEM_KYBER)
add_subdirectory(kem/kyber)
set(KEM_OBJS ${KEM_OBJS} ${KYBER_OBJS})
endif()
+if(OQS_ENABLE_KEM_ML_KEM)
+ add_subdirectory(kem/ml_kem)
+ set(KEM_OBJS ${KEM_OBJS} ${ML_KEM_OBJS})
+endif()
if(OQS_ENABLE_SIG_DILITHIUM)
add_subdirectory(sig/dilithium)
set(SIG_OBJS ${SIG_OBJS} ${DILITHIUM_OBJS})
endif()
+if(OQS_ENABLE_SIG_ML_DSA)
+ add_subdirectory(sig/ml_dsa)
+ set(SIG_OBJS ${SIG_OBJS} ${ML_DSA_OBJS})
+endif()
if(OQS_ENABLE_SIG_FALCON)
add_subdirectory(sig/falcon)
set(SIG_OBJS ${SIG_OBJS} ${FALCON_OBJS})
diff --git a/src/kem/classic_mceliece/kem_classic_mceliece.h b/src/kem/classic_mceliece/kem_classic_mceliece.h
index 766b751c59..2bbd969820 100644
--- a/src/kem/classic_mceliece/kem_classic_mceliece.h
+++ b/src/kem/classic_mceliece/kem_classic_mceliece.h
@@ -5,7 +5,7 @@
#include
-#ifdef OQS_ENABLE_KEM_classic_mceliece_348864
+#if defined(OQS_ENABLE_KEM_classic_mceliece_348864)
#define OQS_KEM_classic_mceliece_348864_length_public_key 261120
#define OQS_KEM_classic_mceliece_348864_length_secret_key 6492
#define OQS_KEM_classic_mceliece_348864_length_ciphertext 96
@@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_348864_encaps(uint8_t *ciphertext, u
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_348864_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_348864f
+#if defined(OQS_ENABLE_KEM_classic_mceliece_348864f)
#define OQS_KEM_classic_mceliece_348864f_length_public_key 261120
#define OQS_KEM_classic_mceliece_348864f_length_secret_key 6492
#define OQS_KEM_classic_mceliece_348864f_length_ciphertext 96
@@ -27,7 +27,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_348864f_encaps(uint8_t *ciphertext,
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_348864f_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_460896
+#if defined(OQS_ENABLE_KEM_classic_mceliece_460896)
#define OQS_KEM_classic_mceliece_460896_length_public_key 524160
#define OQS_KEM_classic_mceliece_460896_length_secret_key 13608
#define OQS_KEM_classic_mceliece_460896_length_ciphertext 156
@@ -38,7 +38,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_460896_encaps(uint8_t *ciphertext, u
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_460896_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_460896f
+#if defined(OQS_ENABLE_KEM_classic_mceliece_460896f)
#define OQS_KEM_classic_mceliece_460896f_length_public_key 524160
#define OQS_KEM_classic_mceliece_460896f_length_secret_key 13608
#define OQS_KEM_classic_mceliece_460896f_length_ciphertext 156
@@ -49,7 +49,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_460896f_encaps(uint8_t *ciphertext,
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_460896f_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_6688128
+#if defined(OQS_ENABLE_KEM_classic_mceliece_6688128)
#define OQS_KEM_classic_mceliece_6688128_length_public_key 1044992
#define OQS_KEM_classic_mceliece_6688128_length_secret_key 13932
#define OQS_KEM_classic_mceliece_6688128_length_ciphertext 208
@@ -60,7 +60,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6688128_encaps(uint8_t *ciphertext,
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6688128_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_6688128f
+#if defined(OQS_ENABLE_KEM_classic_mceliece_6688128f)
#define OQS_KEM_classic_mceliece_6688128f_length_public_key 1044992
#define OQS_KEM_classic_mceliece_6688128f_length_secret_key 13932
#define OQS_KEM_classic_mceliece_6688128f_length_ciphertext 208
@@ -71,7 +71,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6688128f_encaps(uint8_t *ciphertext,
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6688128f_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_6960119
+#if defined(OQS_ENABLE_KEM_classic_mceliece_6960119)
#define OQS_KEM_classic_mceliece_6960119_length_public_key 1047319
#define OQS_KEM_classic_mceliece_6960119_length_secret_key 13948
#define OQS_KEM_classic_mceliece_6960119_length_ciphertext 194
@@ -82,7 +82,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6960119_encaps(uint8_t *ciphertext,
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6960119_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_6960119f
+#if defined(OQS_ENABLE_KEM_classic_mceliece_6960119f)
#define OQS_KEM_classic_mceliece_6960119f_length_public_key 1047319
#define OQS_KEM_classic_mceliece_6960119f_length_secret_key 13948
#define OQS_KEM_classic_mceliece_6960119f_length_ciphertext 194
@@ -93,7 +93,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6960119f_encaps(uint8_t *ciphertext,
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_6960119f_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_8192128
+#if defined(OQS_ENABLE_KEM_classic_mceliece_8192128)
#define OQS_KEM_classic_mceliece_8192128_length_public_key 1357824
#define OQS_KEM_classic_mceliece_8192128_length_secret_key 14120
#define OQS_KEM_classic_mceliece_8192128_length_ciphertext 208
@@ -104,7 +104,7 @@ OQS_API OQS_STATUS OQS_KEM_classic_mceliece_8192128_encaps(uint8_t *ciphertext,
OQS_API OQS_STATUS OQS_KEM_classic_mceliece_8192128_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_classic_mceliece_8192128f
+#if defined(OQS_ENABLE_KEM_classic_mceliece_8192128f)
#define OQS_KEM_classic_mceliece_8192128f_length_public_key 1357824
#define OQS_KEM_classic_mceliece_8192128f_length_secret_key 14120
#define OQS_KEM_classic_mceliece_8192128f_length_ciphertext 208
diff --git a/src/kem/hqc/kem_hqc.h b/src/kem/hqc/kem_hqc.h
index 1df06e1c11..b1f022374d 100644
--- a/src/kem/hqc/kem_hqc.h
+++ b/src/kem/hqc/kem_hqc.h
@@ -5,7 +5,7 @@
#include
-#ifdef OQS_ENABLE_KEM_hqc_128
+#if defined(OQS_ENABLE_KEM_hqc_128)
#define OQS_KEM_hqc_128_length_public_key 2249
#define OQS_KEM_hqc_128_length_secret_key 2305
#define OQS_KEM_hqc_128_length_ciphertext 4433
@@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_KEM_hqc_128_encaps(uint8_t *ciphertext, uint8_t *shared_s
OQS_API OQS_STATUS OQS_KEM_hqc_128_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_hqc_192
+#if defined(OQS_ENABLE_KEM_hqc_192)
#define OQS_KEM_hqc_192_length_public_key 4522
#define OQS_KEM_hqc_192_length_secret_key 4586
#define OQS_KEM_hqc_192_length_ciphertext 8978
@@ -27,7 +27,7 @@ OQS_API OQS_STATUS OQS_KEM_hqc_192_encaps(uint8_t *ciphertext, uint8_t *shared_s
OQS_API OQS_STATUS OQS_KEM_hqc_192_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_hqc_256
+#if defined(OQS_ENABLE_KEM_hqc_256)
#define OQS_KEM_hqc_256_length_public_key 7245
#define OQS_KEM_hqc_256_length_secret_key 7317
#define OQS_KEM_hqc_256_length_ciphertext 14421
diff --git a/src/kem/kem.c b/src/kem/kem.c
index 01448af121..0a340e4f38 100644
--- a/src/kem/kem.c
+++ b/src/kem/kem.c
@@ -34,6 +34,12 @@ OQS_API const char *OQS_KEM_alg_identifier(size_t i) {
OQS_KEM_alg_kyber_512,
OQS_KEM_alg_kyber_768,
OQS_KEM_alg_kyber_1024,
+ OQS_KEM_alg_ml_kem_512_ipd,
+ OQS_KEM_alg_ml_kem_512,
+ OQS_KEM_alg_ml_kem_768_ipd,
+ OQS_KEM_alg_ml_kem_768,
+ OQS_KEM_alg_ml_kem_1024_ipd,
+ OQS_KEM_alg_ml_kem_1024,
///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALG_IDENTIFIER_END
OQS_KEM_alg_ntruprime_sntrup761,
OQS_KEM_alg_frodokem_640_aes,
@@ -82,96 +88,154 @@ OQS_API int OQS_KEM_alg_is_enabled(const char *method_name) {
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_348864f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_348864f
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_460896)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_460896
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_460896f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_460896f
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6688128)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_6688128
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6688128f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_6688128f
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6960119)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_6960119
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6960119f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_6960119f
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_8192128)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_8192128
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_8192128f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_8192128f
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_128)) {
#ifdef OQS_ENABLE_KEM_hqc_128
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_192)) {
#ifdef OQS_ENABLE_KEM_hqc_192
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_256)) {
#ifdef OQS_ENABLE_KEM_hqc_256
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_512)) {
#ifdef OQS_ENABLE_KEM_kyber_512
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_768)) {
#ifdef OQS_ENABLE_KEM_kyber_768
return 1;
#else
return 0;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_1024)) {
#ifdef OQS_ENABLE_KEM_kyber_1024
return 1;
#else
return 0;
#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_512_ipd)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_512_ipd
+ return 1;
+#else
+ return 0;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_512)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_512
+ return 1;
+#else
+ return 0;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_768_ipd)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_768_ipd
+ return 1;
+#else
+ return 0;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_768)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_768
+ return 1;
+#else
+ return 0;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_1024_ipd)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_1024_ipd
+ return 1;
+#else
+ return 0;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_1024)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_1024
+ return 1;
+#else
+ return 0;
+#endif
+
///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ENABLED_CASE_END
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_ntruprime_sntrup761)) {
#ifdef OQS_ENABLE_KEM_ntruprime_sntrup761
@@ -250,96 +314,154 @@ OQS_API OQS_KEM *OQS_KEM_new(const char *method_name) {
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_348864f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_348864f
return OQS_KEM_classic_mceliece_348864f_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_460896)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_460896
return OQS_KEM_classic_mceliece_460896_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_460896f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_460896f
return OQS_KEM_classic_mceliece_460896f_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6688128)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_6688128
return OQS_KEM_classic_mceliece_6688128_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6688128f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_6688128f
return OQS_KEM_classic_mceliece_6688128f_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6960119)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_6960119
return OQS_KEM_classic_mceliece_6960119_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_6960119f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_6960119f
return OQS_KEM_classic_mceliece_6960119f_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_8192128)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_8192128
return OQS_KEM_classic_mceliece_8192128_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_classic_mceliece_8192128f)) {
#ifdef OQS_ENABLE_KEM_classic_mceliece_8192128f
return OQS_KEM_classic_mceliece_8192128f_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_128)) {
#ifdef OQS_ENABLE_KEM_hqc_128
return OQS_KEM_hqc_128_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_192)) {
#ifdef OQS_ENABLE_KEM_hqc_192
return OQS_KEM_hqc_192_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_hqc_256)) {
#ifdef OQS_ENABLE_KEM_hqc_256
return OQS_KEM_hqc_256_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_512)) {
#ifdef OQS_ENABLE_KEM_kyber_512
return OQS_KEM_kyber_512_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_768)) {
#ifdef OQS_ENABLE_KEM_kyber_768
return OQS_KEM_kyber_768_new();
#else
return NULL;
#endif
+
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_kyber_1024)) {
#ifdef OQS_ENABLE_KEM_kyber_1024
return OQS_KEM_kyber_1024_new();
#else
return NULL;
#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_512_ipd)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_512_ipd
+ return OQS_KEM_ml_kem_512_ipd_new();
+#else
+ return NULL;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_512)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_512
+ return OQS_KEM_ml_kem_512_new();
+#else
+ return NULL;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_768_ipd)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_768_ipd
+ return OQS_KEM_ml_kem_768_ipd_new();
+#else
+ return NULL;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_768)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_768
+ return OQS_KEM_ml_kem_768_new();
+#else
+ return NULL;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_1024_ipd)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_1024_ipd
+ return OQS_KEM_ml_kem_1024_ipd_new();
+#else
+ return NULL;
+#endif
+
+ } else if (0 == strcasecmp(method_name, OQS_KEM_alg_ml_kem_1024)) {
+#ifdef OQS_ENABLE_KEM_ml_kem_1024
+ return OQS_KEM_ml_kem_1024_new();
+#else
+ return NULL;
+#endif
+
///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_NEW_CASE_END
} else if (0 == strcasecmp(method_name, OQS_KEM_alg_ntruprime_sntrup761)) {
#ifdef OQS_ENABLE_KEM_ntruprime_sntrup761
diff --git a/src/kem/kem.h b/src/kem/kem.h
index e2ea8d9d67..0e579c0477 100644
--- a/src/kem/kem.h
+++ b/src/kem/kem.h
@@ -70,6 +70,18 @@ extern "C" {
#define OQS_KEM_alg_kyber_768 "Kyber768"
/** Algorithm identifier for Kyber1024 KEM. */
#define OQS_KEM_alg_kyber_1024 "Kyber1024"
+/** Algorithm identifier for ML-KEM-512-ipd KEM. */
+#define OQS_KEM_alg_ml_kem_512_ipd "ML-KEM-512-ipd"
+/** Algorithm identifier for ML-KEM-512 KEM. */
+#define OQS_KEM_alg_ml_kem_512 "ML-KEM-512"
+/** Algorithm identifier for ML-KEM-768-ipd KEM. */
+#define OQS_KEM_alg_ml_kem_768_ipd "ML-KEM-768-ipd"
+/** Algorithm identifier for ML-KEM-768 KEM. */
+#define OQS_KEM_alg_ml_kem_768 "ML-KEM-768"
+/** Algorithm identifier for ML-KEM-1024-ipd KEM. */
+#define OQS_KEM_alg_ml_kem_1024_ipd "ML-KEM-1024-ipd"
+/** Algorithm identifier for ML-KEM-1024 KEM. */
+#define OQS_KEM_alg_ml_kem_1024 "ML-KEM-1024"
///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALG_IDENTIFIER_END
/** Algorithm identifier for sntrup761 KEM. */
#define OQS_KEM_alg_ntruprime_sntrup761 "sntrup761"
@@ -87,8 +99,9 @@ extern "C" {
#define OQS_KEM_alg_frodokem_1344_shake "FrodoKEM-1344-SHAKE"
// EDIT-WHEN-ADDING-KEM
///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALGS_LENGTH_START
+
/** Number of algorithm identifiers above. */
-#define OQS_KEM_algs_length 26
+#define OQS_KEM_algs_length 32
///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_ALGS_LENGTH_END
/**
@@ -269,6 +282,9 @@ OQS_API void OQS_KEM_free(OQS_KEM *kem);
#ifdef OQS_ENABLE_KEM_KYBER
#include
#endif /* OQS_ENABLE_KEM_KYBER */
+#ifdef OQS_ENABLE_KEM_ML_KEM
+#include
+#endif /* OQS_ENABLE_KEM_ML_KEM */
///// OQS_COPY_FROM_UPSTREAM_FRAGMENT_INCLUDE_END
#ifdef OQS_ENABLE_KEM_NTRUPRIME
#include
diff --git a/src/kem/kyber/kem_kyber.h b/src/kem/kyber/kem_kyber.h
index 5d24808488..cb475aff27 100644
--- a/src/kem/kyber/kem_kyber.h
+++ b/src/kem/kyber/kem_kyber.h
@@ -5,7 +5,7 @@
#include
-#ifdef OQS_ENABLE_KEM_kyber_512
+#if defined(OQS_ENABLE_KEM_kyber_512)
#define OQS_KEM_kyber_512_length_public_key 800
#define OQS_KEM_kyber_512_length_secret_key 1632
#define OQS_KEM_kyber_512_length_ciphertext 768
@@ -16,7 +16,7 @@ OQS_API OQS_STATUS OQS_KEM_kyber_512_encaps(uint8_t *ciphertext, uint8_t *shared
OQS_API OQS_STATUS OQS_KEM_kyber_512_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_kyber_768
+#if defined(OQS_ENABLE_KEM_kyber_768)
#define OQS_KEM_kyber_768_length_public_key 1184
#define OQS_KEM_kyber_768_length_secret_key 2400
#define OQS_KEM_kyber_768_length_ciphertext 1088
@@ -27,7 +27,7 @@ OQS_API OQS_STATUS OQS_KEM_kyber_768_encaps(uint8_t *ciphertext, uint8_t *shared
OQS_API OQS_STATUS OQS_KEM_kyber_768_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
#endif
-#ifdef OQS_ENABLE_KEM_kyber_1024
+#if defined(OQS_ENABLE_KEM_kyber_1024)
#define OQS_KEM_kyber_1024_length_public_key 1568
#define OQS_KEM_kyber_1024_length_secret_key 3168
#define OQS_KEM_kyber_1024_length_ciphertext 1568
diff --git a/src/kem/ml_kem/CMakeLists.txt b/src/kem/ml_kem/CMakeLists.txt
new file mode 100644
index 0000000000..a5890ab9ce
--- /dev/null
+++ b/src/kem/ml_kem/CMakeLists.txt
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: MIT
+
+# This file was generated by
+# scripts/copy_from_upstream/copy_from_upstream.py
+
+set(_ML_KEM_OBJS "")
+
+if(OQS_ENABLE_KEM_ml_kem_512_ipd)
+ add_library(ml_kem_512_ipd_ref OBJECT kem_ml_kem_512_ipd.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-512-ipd_ref/verify.c)
+ target_compile_options(ml_kem_512_ipd_ref PUBLIC -DKYBER_K=2)
+ target_include_directories(ml_kem_512_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-512-ipd_ref)
+ target_include_directories(ml_kem_512_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
+ target_compile_options(ml_kem_512_ipd_ref PUBLIC -DKYBER_K=2)
+ set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $)
+endif()
+
+if(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2)
+ add_library(ml_kem_512_ipd_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/verify.c)
+ target_include_directories(ml_kem_512_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2)
+ target_include_directories(ml_kem_512_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
+ target_compile_options(ml_kem_512_ipd_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt )
+ target_compile_options(ml_kem_512_ipd_avx2 PUBLIC -DKYBER_K=2)
+ set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $)
+endif()
+
+if(OQS_ENABLE_KEM_ml_kem_768_ipd)
+ add_library(ml_kem_768_ipd_ref OBJECT kem_ml_kem_768_ipd.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/cbd.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/kem.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/ntt.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/poly.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/reduce.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-768-ipd_ref/verify.c)
+ target_compile_options(ml_kem_768_ipd_ref PUBLIC -DKYBER_K=3)
+ target_include_directories(ml_kem_768_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-768-ipd_ref)
+ target_include_directories(ml_kem_768_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
+ target_compile_options(ml_kem_768_ipd_ref PUBLIC -DKYBER_K=3)
+ set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $)
+endif()
+
+if(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2)
+ add_library(ml_kem_768_ipd_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/consts.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/fq.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/kem.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/poly.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-768-ipd_avx2/verify.c)
+ target_include_directories(ml_kem_768_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-768-ipd_avx2)
+ target_include_directories(ml_kem_768_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
+ target_compile_options(ml_kem_768_ipd_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt )
+ target_compile_options(ml_kem_768_ipd_avx2 PUBLIC -DKYBER_K=3)
+ set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $)
+endif()
+
+if(OQS_ENABLE_KEM_ml_kem_1024_ipd)
+ add_library(ml_kem_1024_ipd_ref OBJECT kem_ml_kem_1024_ipd.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/verify.c)
+ target_compile_options(ml_kem_1024_ipd_ref PUBLIC -DKYBER_K=4)
+ target_include_directories(ml_kem_1024_ipd_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref)
+ target_include_directories(ml_kem_1024_ipd_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
+ target_compile_options(ml_kem_1024_ipd_ref PUBLIC -DKYBER_K=4)
+ set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $)
+endif()
+
+if(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2)
+ add_library(ml_kem_1024_ipd_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/verify.c)
+ target_include_directories(ml_kem_1024_ipd_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2)
+ target_include_directories(ml_kem_1024_ipd_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
+ target_compile_options(ml_kem_1024_ipd_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt )
+ target_compile_options(ml_kem_1024_ipd_avx2 PUBLIC -DKYBER_K=4)
+ set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $)
+endif()
+
+set(ML_KEM_OBJS ${_ML_KEM_OBJS} PARENT_SCOPE)
diff --git a/src/kem/ml_kem/kem_ml_kem.h b/src/kem/ml_kem/kem_ml_kem.h
new file mode 100644
index 0000000000..b3e3d99cfb
--- /dev/null
+++ b/src/kem/ml_kem/kem_ml_kem.h
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: MIT
+
+#ifndef OQS_KEM_ML_KEM_H
+#define OQS_KEM_ML_KEM_H
+
+#include
+
+#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd) || defined(OQS_ENABLE_KEM_ml_kem_512)
+#define OQS_KEM_ml_kem_512_ipd_length_public_key 800
+#define OQS_KEM_ml_kem_512_ipd_length_secret_key 1632
+#define OQS_KEM_ml_kem_512_ipd_length_ciphertext 768
+#define OQS_KEM_ml_kem_512_ipd_length_shared_secret 32
+OQS_KEM *OQS_KEM_ml_kem_512_ipd_new(void);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_keypair(uint8_t *public_key, uint8_t *secret_key);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
+
+#define OQS_KEM_ml_kem_512_length_public_key OQS_KEM_ml_kem_512_ipd_length_public_key
+#define OQS_KEM_ml_kem_512_length_secret_key OQS_KEM_ml_kem_512_ipd_length_secret_key
+#define OQS_KEM_ml_kem_512_length_ciphertext OQS_KEM_ml_kem_512_ipd_length_ciphertext
+#define OQS_KEM_ml_kem_512_length_shared_secret OQS_KEM_ml_kem_512_ipd_length_shared_secret
+OQS_KEM *OQS_KEM_ml_kem_512_new(void);
+#define OQS_KEM_ml_kem_512_keypair OQS_KEM_ml_kem_512_ipd_keypair
+#define OQS_KEM_ml_kem_512_encaps OQS_KEM_ml_kem_512_ipd_encaps
+#define OQS_KEM_ml_kem_512_decaps OQS_KEM_ml_kem_512_ipd_decaps
+#endif
+
+#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd) || defined(OQS_ENABLE_KEM_ml_kem_768)
+#define OQS_KEM_ml_kem_768_ipd_length_public_key 1184
+#define OQS_KEM_ml_kem_768_ipd_length_secret_key 2400
+#define OQS_KEM_ml_kem_768_ipd_length_ciphertext 1088
+#define OQS_KEM_ml_kem_768_ipd_length_shared_secret 32
+OQS_KEM *OQS_KEM_ml_kem_768_ipd_new(void);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_keypair(uint8_t *public_key, uint8_t *secret_key);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
+
+#define OQS_KEM_ml_kem_768_length_public_key OQS_KEM_ml_kem_768_ipd_length_public_key
+#define OQS_KEM_ml_kem_768_length_secret_key OQS_KEM_ml_kem_768_ipd_length_secret_key
+#define OQS_KEM_ml_kem_768_length_ciphertext OQS_KEM_ml_kem_768_ipd_length_ciphertext
+#define OQS_KEM_ml_kem_768_length_shared_secret OQS_KEM_ml_kem_768_ipd_length_shared_secret
+OQS_KEM *OQS_KEM_ml_kem_768_new(void);
+#define OQS_KEM_ml_kem_768_keypair OQS_KEM_ml_kem_768_ipd_keypair
+#define OQS_KEM_ml_kem_768_encaps OQS_KEM_ml_kem_768_ipd_encaps
+#define OQS_KEM_ml_kem_768_decaps OQS_KEM_ml_kem_768_ipd_decaps
+#endif
+
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd) || defined(OQS_ENABLE_KEM_ml_kem_1024)
+#define OQS_KEM_ml_kem_1024_ipd_length_public_key 1568
+#define OQS_KEM_ml_kem_1024_ipd_length_secret_key 3168
+#define OQS_KEM_ml_kem_1024_ipd_length_ciphertext 1568
+#define OQS_KEM_ml_kem_1024_ipd_length_shared_secret 32
+OQS_KEM *OQS_KEM_ml_kem_1024_ipd_new(void);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_keypair(uint8_t *public_key, uint8_t *secret_key);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key);
+OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key);
+
+#define OQS_KEM_ml_kem_1024_length_public_key OQS_KEM_ml_kem_1024_ipd_length_public_key
+#define OQS_KEM_ml_kem_1024_length_secret_key OQS_KEM_ml_kem_1024_ipd_length_secret_key
+#define OQS_KEM_ml_kem_1024_length_ciphertext OQS_KEM_ml_kem_1024_ipd_length_ciphertext
+#define OQS_KEM_ml_kem_1024_length_shared_secret OQS_KEM_ml_kem_1024_ipd_length_shared_secret
+OQS_KEM *OQS_KEM_ml_kem_1024_new(void);
+#define OQS_KEM_ml_kem_1024_keypair OQS_KEM_ml_kem_1024_ipd_keypair
+#define OQS_KEM_ml_kem_1024_encaps OQS_KEM_ml_kem_1024_ipd_encaps
+#define OQS_KEM_ml_kem_1024_decaps OQS_KEM_ml_kem_1024_ipd_decaps
+#endif
+
+#endif
+
diff --git a/src/kem/ml_kem/kem_ml_kem_1024_ipd.c b/src/kem/ml_kem/kem_ml_kem_1024_ipd.c
new file mode 100644
index 0000000000..182b3b32e9
--- /dev/null
+++ b/src/kem/ml_kem/kem_ml_kem_1024_ipd.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: MIT
+
+#include
+
+#include
+
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd)
+
+OQS_KEM *OQS_KEM_ml_kem_1024_ipd_new(void) {
+
+ OQS_KEM *kem = malloc(sizeof(OQS_KEM));
+ if (kem == NULL) {
+ return NULL;
+ }
+ kem->method_name = OQS_KEM_alg_ml_kem_1024_ipd;
+ kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard";
+
+ kem->claimed_nist_level = 5;
+ kem->ind_cca = true;
+
+ kem->length_public_key = OQS_KEM_ml_kem_1024_ipd_length_public_key;
+ kem->length_secret_key = OQS_KEM_ml_kem_1024_ipd_length_secret_key;
+ kem->length_ciphertext = OQS_KEM_ml_kem_1024_ipd_length_ciphertext;
+ kem->length_shared_secret = OQS_KEM_ml_kem_1024_ipd_length_shared_secret;
+
+ kem->keypair = OQS_KEM_ml_kem_1024_ipd_keypair;
+ kem->encaps = OQS_KEM_ml_kem_1024_ipd_encaps;
+ kem->decaps = OQS_KEM_ml_kem_1024_ipd_decaps;
+
+ return kem;
+}
+
+/** Alias */
+OQS_KEM *OQS_KEM_ml_kem_1024_new(void) {
+
+ OQS_KEM *kem = malloc(sizeof(OQS_KEM));
+ if (kem == NULL) {
+ return NULL;
+ }
+ kem->method_name = OQS_KEM_alg_ml_kem_1024;
+ kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard";
+
+ kem->claimed_nist_level = 5;
+ kem->ind_cca = true;
+
+ kem->length_public_key = OQS_KEM_ml_kem_1024_length_public_key;
+ kem->length_secret_key = OQS_KEM_ml_kem_1024_length_secret_key;
+ kem->length_ciphertext = OQS_KEM_ml_kem_1024_length_ciphertext;
+ kem->length_shared_secret = OQS_KEM_ml_kem_1024_length_shared_secret;
+
+ kem->keypair = OQS_KEM_ml_kem_1024_keypair;
+ kem->encaps = OQS_KEM_ml_kem_1024_encaps;
+ kem->decaps = OQS_KEM_ml_kem_1024_decaps;
+
+ return kem;
+}
+
+extern int pqcrystals_ml_kem_1024_ipd_ref_keypair(uint8_t *pk, uint8_t *sk);
+extern int pqcrystals_ml_kem_1024_ipd_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int pqcrystals_ml_kem_1024_ipd_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2)
+extern int pqcrystals_ml_kem_1024_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk);
+extern int pqcrystals_ml_kem_1024_ipd_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int pqcrystals_ml_kem_1024_ipd_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+#endif
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_avx2_keypair(public_key, secret_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_keypair(public_key, secret_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_keypair(public_key, secret_key);
+#endif
+}
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_avx2_enc(ciphertext, shared_secret, public_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_enc(ciphertext, shared_secret, public_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_enc(ciphertext, shared_secret, public_key);
+#endif
+}
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_avx2_dec(shared_secret, ciphertext, secret_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_dec(shared_secret, ciphertext, secret_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_1024_ipd_ref_dec(shared_secret, ciphertext, secret_key);
+#endif
+}
+
+#endif
diff --git a/src/kem/ml_kem/kem_ml_kem_512_ipd.c b/src/kem/ml_kem/kem_ml_kem_512_ipd.c
new file mode 100644
index 0000000000..ea228dd869
--- /dev/null
+++ b/src/kem/ml_kem/kem_ml_kem_512_ipd.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: MIT
+
+#include
+
+#include
+
+#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd)
+
+OQS_KEM *OQS_KEM_ml_kem_512_ipd_new(void) {
+
+ OQS_KEM *kem = malloc(sizeof(OQS_KEM));
+ if (kem == NULL) {
+ return NULL;
+ }
+ kem->method_name = OQS_KEM_alg_ml_kem_512_ipd;
+ kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard";
+
+ kem->claimed_nist_level = 1;
+ kem->ind_cca = true;
+
+ kem->length_public_key = OQS_KEM_ml_kem_512_ipd_length_public_key;
+ kem->length_secret_key = OQS_KEM_ml_kem_512_ipd_length_secret_key;
+ kem->length_ciphertext = OQS_KEM_ml_kem_512_ipd_length_ciphertext;
+ kem->length_shared_secret = OQS_KEM_ml_kem_512_ipd_length_shared_secret;
+
+ kem->keypair = OQS_KEM_ml_kem_512_ipd_keypair;
+ kem->encaps = OQS_KEM_ml_kem_512_ipd_encaps;
+ kem->decaps = OQS_KEM_ml_kem_512_ipd_decaps;
+
+ return kem;
+}
+
+/** Alias */
+OQS_KEM *OQS_KEM_ml_kem_512_new(void) {
+
+ OQS_KEM *kem = malloc(sizeof(OQS_KEM));
+ if (kem == NULL) {
+ return NULL;
+ }
+ kem->method_name = OQS_KEM_alg_ml_kem_512;
+ kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard";
+
+ kem->claimed_nist_level = 1;
+ kem->ind_cca = true;
+
+ kem->length_public_key = OQS_KEM_ml_kem_512_length_public_key;
+ kem->length_secret_key = OQS_KEM_ml_kem_512_length_secret_key;
+ kem->length_ciphertext = OQS_KEM_ml_kem_512_length_ciphertext;
+ kem->length_shared_secret = OQS_KEM_ml_kem_512_length_shared_secret;
+
+ kem->keypair = OQS_KEM_ml_kem_512_keypair;
+ kem->encaps = OQS_KEM_ml_kem_512_encaps;
+ kem->decaps = OQS_KEM_ml_kem_512_decaps;
+
+ return kem;
+}
+
+extern int pqcrystals_ml_kem_512_ipd_ref_keypair(uint8_t *pk, uint8_t *sk);
+extern int pqcrystals_ml_kem_512_ipd_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int pqcrystals_ml_kem_512_ipd_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2)
+extern int pqcrystals_ml_kem_512_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk);
+extern int pqcrystals_ml_kem_512_ipd_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int pqcrystals_ml_kem_512_ipd_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+#endif
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_avx2_keypair(public_key, secret_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_keypair(public_key, secret_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_keypair(public_key, secret_key);
+#endif
+}
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_avx2_enc(ciphertext, shared_secret, public_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_enc(ciphertext, shared_secret, public_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_enc(ciphertext, shared_secret, public_key);
+#endif
+}
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_512_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_512_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_avx2_dec(shared_secret, ciphertext, secret_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_dec(shared_secret, ciphertext, secret_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_512_ipd_ref_dec(shared_secret, ciphertext, secret_key);
+#endif
+}
+
+#endif
diff --git a/src/kem/ml_kem/kem_ml_kem_768_ipd.c b/src/kem/ml_kem/kem_ml_kem_768_ipd.c
new file mode 100644
index 0000000000..281f505fa7
--- /dev/null
+++ b/src/kem/ml_kem/kem_ml_kem_768_ipd.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: MIT
+
+#include
+
+#include
+
+#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd)
+
+OQS_KEM *OQS_KEM_ml_kem_768_ipd_new(void) {
+
+ OQS_KEM *kem = malloc(sizeof(OQS_KEM));
+ if (kem == NULL) {
+ return NULL;
+ }
+ kem->method_name = OQS_KEM_alg_ml_kem_768_ipd;
+ kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard";
+
+ kem->claimed_nist_level = 3;
+ kem->ind_cca = true;
+
+ kem->length_public_key = OQS_KEM_ml_kem_768_ipd_length_public_key;
+ kem->length_secret_key = OQS_KEM_ml_kem_768_ipd_length_secret_key;
+ kem->length_ciphertext = OQS_KEM_ml_kem_768_ipd_length_ciphertext;
+ kem->length_shared_secret = OQS_KEM_ml_kem_768_ipd_length_shared_secret;
+
+ kem->keypair = OQS_KEM_ml_kem_768_ipd_keypair;
+ kem->encaps = OQS_KEM_ml_kem_768_ipd_encaps;
+ kem->decaps = OQS_KEM_ml_kem_768_ipd_decaps;
+
+ return kem;
+}
+
+/** Alias */
+OQS_KEM *OQS_KEM_ml_kem_768_new(void) {
+
+ OQS_KEM *kem = malloc(sizeof(OQS_KEM));
+ if (kem == NULL) {
+ return NULL;
+ }
+ kem->method_name = OQS_KEM_alg_ml_kem_768;
+ kem->alg_version = "https://github.com/pq-crystals/kyber/tree/standard";
+
+ kem->claimed_nist_level = 3;
+ kem->ind_cca = true;
+
+ kem->length_public_key = OQS_KEM_ml_kem_768_length_public_key;
+ kem->length_secret_key = OQS_KEM_ml_kem_768_length_secret_key;
+ kem->length_ciphertext = OQS_KEM_ml_kem_768_length_ciphertext;
+ kem->length_shared_secret = OQS_KEM_ml_kem_768_length_shared_secret;
+
+ kem->keypair = OQS_KEM_ml_kem_768_keypair;
+ kem->encaps = OQS_KEM_ml_kem_768_encaps;
+ kem->decaps = OQS_KEM_ml_kem_768_decaps;
+
+ return kem;
+}
+
+extern int pqcrystals_ml_kem_768_ipd_ref_keypair(uint8_t *pk, uint8_t *sk);
+extern int pqcrystals_ml_kem_768_ipd_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int pqcrystals_ml_kem_768_ipd_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2)
+extern int pqcrystals_ml_kem_768_ipd_avx2_keypair(uint8_t *pk, uint8_t *sk);
+extern int pqcrystals_ml_kem_768_ipd_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int pqcrystals_ml_kem_768_ipd_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+#endif
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_keypair(uint8_t *public_key, uint8_t *secret_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_avx2_keypair(public_key, secret_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_keypair(public_key, secret_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_keypair(public_key, secret_key);
+#endif
+}
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_avx2_enc(ciphertext, shared_secret, public_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_enc(ciphertext, shared_secret, public_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_enc(ciphertext, shared_secret, public_key);
+#endif
+}
+
+OQS_API OQS_STATUS OQS_KEM_ml_kem_768_ipd_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
+#if defined(OQS_ENABLE_KEM_ml_kem_768_ipd_avx2)
+#if defined(OQS_DIST_BUILD)
+ if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
+#endif /* OQS_DIST_BUILD */
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_avx2_dec(shared_secret, ciphertext, secret_key);
+#if defined(OQS_DIST_BUILD)
+ } else {
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_dec(shared_secret, ciphertext, secret_key);
+ }
+#endif /* OQS_DIST_BUILD */
+#else
+ return (OQS_STATUS) pqcrystals_ml_kem_768_ipd_ref_dec(shared_secret, ciphertext, secret_key);
+#endif
+}
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/LICENSE
new file mode 100644
index 0000000000..7922ab8007
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/LICENSE
@@ -0,0 +1,6 @@
+Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/);
+or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html).
+
+For Keccak and AES we are using public-domain
+code from sources and by authors listed in
+comments on top of the respective files.
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/align.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/align.h
new file mode 100644
index 0000000000..3463866f37
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/align.h
@@ -0,0 +1,19 @@
+#ifndef ALIGN_H
+#define ALIGN_H
+
+#include
+#include
+
+#define ALIGNED_UINT8(N) \
+ union { \
+ uint8_t coeffs[N]; \
+ __m256i vec[(N+31)/32]; \
+ }
+
+#define ALIGNED_INT16(N) \
+ union { \
+ int16_t coeffs[N]; \
+ __m256i vec[(N+15)/16]; \
+ }
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/api.h
new file mode 100644
index 0000000000..a154e80f1d
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/api.h
@@ -0,0 +1,66 @@
+#ifndef API_H
+#define API_H
+
+#include
+
+#define pqcrystals_kyber512_SECRETKEYBYTES 1632
+#define pqcrystals_kyber512_PUBLICKEYBYTES 800
+#define pqcrystals_kyber512_CIPHERTEXTBYTES 768
+#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber512_ENCCOINBYTES 32
+#define pqcrystals_kyber512_BYTES 32
+
+#define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES
+#define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES
+#define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES
+#define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES
+#define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES
+#define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES
+
+int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber768_SECRETKEYBYTES 2400
+#define pqcrystals_kyber768_PUBLICKEYBYTES 1184
+#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088
+#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber768_ENCCOINBYTES 32
+#define pqcrystals_kyber768_BYTES 32
+
+#define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES
+#define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES
+#define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES
+#define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES
+#define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES
+#define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES
+
+int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber1024_SECRETKEYBYTES 3168
+#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568
+#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568
+#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber1024_ENCCOINBYTES 32
+#define pqcrystals_kyber1024_BYTES 32
+
+#define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES
+#define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES
+#define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES
+#define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES
+#define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES
+#define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES
+
+int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/basemul.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/basemul.S
new file mode 100644
index 0000000000..36990639b2
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/basemul.S
@@ -0,0 +1,105 @@
+#include "consts.h"
+
+.macro schoolbook off
+vmovdqa _16XQINV*2(%rcx),%ymm0
+vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0
+vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0
+vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1
+vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1
+
+vpmullw %ymm0,%ymm1,%ymm9 # a0.lo
+vpmullw %ymm0,%ymm2,%ymm10 # b0.lo
+vpmullw %ymm0,%ymm3,%ymm11 # a1.lo
+vpmullw %ymm0,%ymm4,%ymm12 # b1.lo
+
+vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0
+vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0
+
+vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi
+vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi
+vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi
+vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi
+
+vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1
+vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1
+
+vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi
+vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi
+vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi
+vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi
+
+vmovdqa %ymm13,(%rsp)
+
+vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo
+vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo
+vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo
+vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo
+
+vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo
+vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo
+vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo
+vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo
+
+vmovdqa _16XQ*2(%rcx),%ymm8
+vpmulhw %ymm8,%ymm13,%ymm13
+vpmulhw %ymm8,%ymm9,%ymm9
+vpmulhw %ymm8,%ymm5,%ymm5
+vpmulhw %ymm8,%ymm10,%ymm10
+vpmulhw %ymm8,%ymm6,%ymm6
+vpmulhw %ymm8,%ymm11,%ymm11
+vpmulhw %ymm8,%ymm7,%ymm7
+vpmulhw %ymm8,%ymm12,%ymm12
+
+vpsubw (%rsp),%ymm13,%ymm13 # -a0c0
+vpsubw %ymm9,%ymm1,%ymm9 # a0d0
+vpsubw %ymm5,%ymm14,%ymm5 # b0c0
+vpsubw %ymm10,%ymm2,%ymm10 # b0d0
+
+vpsubw %ymm6,%ymm15,%ymm6 # a1c1
+vpsubw %ymm11,%ymm3,%ymm11 # a1d1
+vpsubw %ymm7,%ymm0,%ymm7 # b1c1
+vpsubw %ymm12,%ymm4,%ymm12 # b1d1
+
+vmovdqa (%r9),%ymm0
+vmovdqa 32(%r9),%ymm1
+vpmullw %ymm0,%ymm10,%ymm2
+vpmullw %ymm0,%ymm12,%ymm3
+vpmulhw %ymm1,%ymm10,%ymm10
+vpmulhw %ymm1,%ymm12,%ymm12
+vpmulhw %ymm8,%ymm2,%ymm2
+vpmulhw %ymm8,%ymm3,%ymm3
+vpsubw %ymm2,%ymm10,%ymm10 # rb0d0
+vpsubw %ymm3,%ymm12,%ymm12 # rb1d1
+
+vpaddw %ymm5,%ymm9,%ymm9
+vpaddw %ymm7,%ymm11,%ymm11
+vpsubw %ymm13,%ymm10,%ymm13
+vpsubw %ymm12,%ymm6,%ymm6
+
+vmovdqa %ymm13,(64*\off+ 0)*2(%rdi)
+vmovdqa %ymm9,(64*\off+16)*2(%rdi)
+vmovdqa %ymm6,(64*\off+32)*2(%rdi)
+vmovdqa %ymm11,(64*\off+48)*2(%rdi)
+.endm
+
+.text
+.global cdecl(basemul_avx)
+cdecl(basemul_avx):
+mov %rsp,%r8
+and $-32,%rsp
+sub $32,%rsp
+
+lea (_ZETAS_EXP+176)*2(%rcx),%r9
+schoolbook 0
+
+add $32*2,%r9
+schoolbook 1
+
+add $192*2,%r9
+schoolbook 2
+
+add $32*2,%r9
+schoolbook 3
+
+mov %r8,%rsp
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.c
new file mode 100644
index 0000000000..dad473c79e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.c
@@ -0,0 +1,144 @@
+#include
+#include
+#include "params.h"
+#include "cbd.h"
+
+/*************************************************
+* Name: cbd2
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const __m256i *buf: pointer to aligned input byte array
+**************************************************/
+static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128])
+{
+ unsigned int i;
+ __m256i f0, f1, f2, f3;
+ const __m256i mask55 = _mm256_set1_epi32(0x55555555);
+ const __m256i mask33 = _mm256_set1_epi32(0x33333333);
+ const __m256i mask03 = _mm256_set1_epi32(0x03030303);
+ const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F);
+
+ for(i = 0; i < KYBER_N/64; i++) {
+ f0 = _mm256_load_si256(&buf[i]);
+
+ f1 = _mm256_srli_epi16(f0, 1);
+ f0 = _mm256_and_si256(mask55, f0);
+ f1 = _mm256_and_si256(mask55, f1);
+ f0 = _mm256_add_epi8(f0, f1);
+
+ f1 = _mm256_srli_epi16(f0, 2);
+ f0 = _mm256_and_si256(mask33, f0);
+ f1 = _mm256_and_si256(mask33, f1);
+ f0 = _mm256_add_epi8(f0, mask33);
+ f0 = _mm256_sub_epi8(f0, f1);
+
+ f1 = _mm256_srli_epi16(f0, 4);
+ f0 = _mm256_and_si256(mask0F, f0);
+ f1 = _mm256_and_si256(mask0F, f1);
+ f0 = _mm256_sub_epi8(f0, mask03);
+ f1 = _mm256_sub_epi8(f1, mask03);
+
+ f2 = _mm256_unpacklo_epi8(f0, f1);
+ f3 = _mm256_unpackhi_epi8(f0, f1);
+
+ f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2));
+ f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1));
+ f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3));
+ f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1));
+
+ _mm256_store_si256(&r->vec[4*i+0], f0);
+ _mm256_store_si256(&r->vec[4*i+1], f2);
+ _mm256_store_si256(&r->vec[4*i+2], f1);
+ _mm256_store_si256(&r->vec[4*i+3], f3);
+ }
+}
+
+#if KYBER_ETA1 == 3
+/*************************************************
+* Name: cbd3
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=3
+* This function is only needed for Kyber-512
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const __m256i *buf: pointer to aligned input byte array
+**************************************************/
+static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8])
+{
+ unsigned int i;
+ __m256i f0, f1, f2, f3;
+ const __m256i mask249 = _mm256_set1_epi32(0x249249);
+ const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB);
+ const __m256i mask07 = _mm256_set1_epi32(7);
+ const __m256i mask70 = _mm256_set1_epi32(7 << 16);
+ const __m256i mask3 = _mm256_set1_epi16(3);
+ const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4,
+ -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0);
+
+ for(i = 0; i < KYBER_N/32; i++) {
+ f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]);
+ f0 = _mm256_permute4x64_epi64(f0,0x94);
+ f0 = _mm256_shuffle_epi8(f0,shufbidx);
+
+ f1 = _mm256_srli_epi32(f0,1);
+ f2 = _mm256_srli_epi32(f0,2);
+ f0 = _mm256_and_si256(mask249,f0);
+ f1 = _mm256_and_si256(mask249,f1);
+ f2 = _mm256_and_si256(mask249,f2);
+ f0 = _mm256_add_epi32(f0,f1);
+ f0 = _mm256_add_epi32(f0,f2);
+
+ f1 = _mm256_srli_epi32(f0,3);
+ f0 = _mm256_add_epi32(f0,mask6DB);
+ f0 = _mm256_sub_epi32(f0,f1);
+
+ f1 = _mm256_slli_epi32(f0,10);
+ f2 = _mm256_srli_epi32(f0,12);
+ f3 = _mm256_srli_epi32(f0, 2);
+ f0 = _mm256_and_si256(f0,mask07);
+ f1 = _mm256_and_si256(f1,mask70);
+ f2 = _mm256_and_si256(f2,mask07);
+ f3 = _mm256_and_si256(f3,mask70);
+ f0 = _mm256_add_epi16(f0,f1);
+ f1 = _mm256_add_epi16(f2,f3);
+ f0 = _mm256_sub_epi16(f0,mask3);
+ f1 = _mm256_sub_epi16(f1,mask3);
+
+ f2 = _mm256_unpacklo_epi32(f0,f1);
+ f3 = _mm256_unpackhi_epi32(f0,f1);
+
+ f0 = _mm256_permute2x128_si256(f2,f3,0x20);
+ f1 = _mm256_permute2x128_si256(f2,f3,0x31);
+
+ _mm256_store_si256(&r->vec[2*i+0], f0);
+ _mm256_store_si256(&r->vec[2*i+1], f1);
+ }
+}
+#endif
+
+/* buf 32 bytes longer for cbd3 */
+void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1])
+{
+#if KYBER_ETA1 == 2
+ cbd2(r, buf);
+#elif KYBER_ETA1 == 3
+ cbd3(r, (uint8_t *)buf);
+#else
+#error "This implementation requires eta1 in {2,3}"
+#endif
+}
+
+void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128])
+{
+#if KYBER_ETA2 == 2
+ cbd2(r, buf);
+#else
+#error "This implementation requires eta2 = 2"
+#endif
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.h
new file mode 100644
index 0000000000..05788e06b4
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/cbd.h
@@ -0,0 +1,15 @@
+#ifndef CBD_H
+#define CBD_H
+
+#include
+#include
+#include "params.h"
+#include "poly.h"
+
+#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1)
+void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]);
+
+#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2)
+void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.c
new file mode 100644
index 0000000000..84e596893d
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.c
@@ -0,0 +1,121 @@
+#include "align.h"
+#include "params.h"
+#include "consts.h"
+
+#define Q KYBER_Q
+#define MONT -1044 // 2^16 mod q
+#define QINV -3327 // q^-1 mod 2^16
+#define V 20159 // floor(2^26/q + 0.5)
+#define FHI 1441 // mont^2/128
+#define FLO -10079 // qinv*FHI
+#define MONTSQHI 1353 // mont^2
+#define MONTSQLO 20553 // qinv*MONTSQHI
+#define MASK 4095
+#define SHIFT 32
+
+const qdata_t qdata = {{
+#define _16XQ 0
+ Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
+
+#define _16XQINV 16
+ QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV,
+ QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV,
+
+#define _16XV 32
+ V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+
+#define _16XFLO 48
+ FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO,
+ FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO,
+
+#define _16XFHI 64
+ FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI,
+ FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI,
+
+#define _16XMONTSQLO 80
+ MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
+ MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
+ MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
+ MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
+
+#define _16XMONTSQHI 96
+ MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
+ MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
+ MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
+ MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
+
+#define _16XMASK 112
+ MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK,
+ MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK,
+
+#define _REVIDXB 128
+ 3854, 3340, 2826, 2312, 1798, 1284, 770, 256,
+ 3854, 3340, 2826, 2312, 1798, 1284, 770, 256,
+
+#define _REVIDXD 144
+ 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0,
+
+#define _ZETAS_EXP 160
+ 31498, 31498, 31498, 31498, -758, -758, -758, -758,
+ 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397,
+ 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745,
+ 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745,
+ -359, -359, -359, -359, -359, -359, -359, -359,
+ -359, -359, -359, -359, -359, -359, -359, -359,
+ 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525,
+ -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402,
+ 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493,
+ 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422,
+ -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758,
+ -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690,
+ -171, -171, -171, -171, 622, 622, 622, 622,
+ 1577, 1577, 1577, 1577, 182, 182, 182, 182,
+ -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057,
+ 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242,
+ 573, 573, -1325, -1325, 264, 264, 383, 383,
+ -829, -829, 1458, 1458, -1602, -1602, -130, -130,
+ -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080,
+ -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837,
+ 1223, 652, -552, 1015, -1293, 1491, -282, -1544,
+ 516, -8, -320, -666, -1618, -1162, 126, 1469,
+ -335, -11477, -32227, 20494, -27738, 945, -14883, 6182,
+ 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276,
+ -1103, 555, -1251, 1550, 422, 177, -291, 1574,
+ -246, 1159, -777, -602, -1590, -872, 418, -156,
+ 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493,
+ -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619,
+ 430, 843, 871, 105, 587, -235, -460, 1653,
+ 778, -147, 1483, 1119, 644, 349, 329, -75,
+ 787, 787, 787, 787, 787, 787, 787, 787,
+ 787, 787, 787, 787, 787, 787, 787, 787,
+ -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517,
+ -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517,
+ 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191,
+ -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694,
+ 287, 287, 287, 287, 287, 287, 287, 287,
+ 202, 202, 202, 202, 202, 202, 202, 202,
+ 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358,
+ -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164,
+ 962, 962, 962, 962, -1202, -1202, -1202, -1202,
+ -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468,
+ -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800,
+ 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163,
+ -681, -681, 1017, 1017, 732, 732, 608, 608,
+ -1542, -1542, 411, 411, -205, -205, -1571, -1571,
+ 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249,
+ 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915,
+ -853, -90, -271, 830, 107, -1421, -247, -951,
+ -398, 961, -1508, -725, 448, -1065, 677, -1275,
+ -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989,
+ 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422,
+ 817, 603, 1322, -1465, -1215, 1218, -874, -1187,
+ -1185, -1278, -1510, -870, -108, 996, 958, 1522,
+ 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469,
+ -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132,
+ 1097, 610, -1285, 384, -136, -1335, 220, -1659,
+ -1530, 794, -854, 478, -308, 991, -1460, 1628,
+
+#define _16XSHIFT 624
+ SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT,
+ SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT
+}};
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.h
new file mode 100644
index 0000000000..f95899cd8e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/consts.h
@@ -0,0 +1,43 @@
+#ifndef CONSTS_H
+#define CONSTS_H
+
+#include "params.h"
+
+#define _16XQ 0
+#define _16XQINV 16
+#define _16XV 32
+#define _16XFLO 48
+#define _16XFHI 64
+#define _16XMONTSQLO 80
+#define _16XMONTSQHI 96
+#define _16XMASK 112
+#define _REVIDXB 128
+#define _REVIDXD 144
+#define _ZETAS_EXP 160
+#define _16XSHIFT 624
+
+/* The C ABI on MacOS exports all symbols with a leading
+ * underscore. This means that any symbols we refer to from
+ * C files (functions) can't be found, and all symbols we
+ * refer to from ASM also can't be found.
+ *
+ * This define helps us get around this
+ */
+#ifdef __ASSEMBLER__
+#if defined(__WIN32__) || defined(__APPLE__)
+#define decorate(s) _##s
+#define cdecl2(s) decorate(s)
+#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s))
+#else
+#define cdecl(s) KYBER_NAMESPACE(##s)
+#endif
+#endif
+
+#ifndef __ASSEMBLER__
+#include "align.h"
+typedef ALIGNED_INT16(640) qdata_t;
+#define qdata KYBER_NAMESPACE(qdata)
+extern const qdata_t qdata;
+#endif
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.S
new file mode 100644
index 0000000000..3bb1ebd3d8
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.S
@@ -0,0 +1,88 @@
+#include "consts.h"
+.include "fq.inc"
+
+.text
+reduce128_avx:
+#load
+vmovdqa (%rdi),%ymm2
+vmovdqa 32(%rdi),%ymm3
+vmovdqa 64(%rdi),%ymm4
+vmovdqa 96(%rdi),%ymm5
+vmovdqa 128(%rdi),%ymm6
+vmovdqa 160(%rdi),%ymm7
+vmovdqa 192(%rdi),%ymm8
+vmovdqa 224(%rdi),%ymm9
+
+red16 2
+red16 3
+red16 4
+red16 5
+red16 6
+red16 7
+red16 8
+red16 9
+
+#store
+vmovdqa %ymm2,(%rdi)
+vmovdqa %ymm3,32(%rdi)
+vmovdqa %ymm4,64(%rdi)
+vmovdqa %ymm5,96(%rdi)
+vmovdqa %ymm6,128(%rdi)
+vmovdqa %ymm7,160(%rdi)
+vmovdqa %ymm8,192(%rdi)
+vmovdqa %ymm9,224(%rdi)
+
+ret
+
+.global cdecl(reduce_avx)
+cdecl(reduce_avx):
+#consts
+vmovdqa _16XQ*2(%rsi),%ymm0
+vmovdqa _16XV*2(%rsi),%ymm1
+call reduce128_avx
+add $256,%rdi
+call reduce128_avx
+ret
+
+tomont128_avx:
+#load
+vmovdqa (%rdi),%ymm3
+vmovdqa 32(%rdi),%ymm4
+vmovdqa 64(%rdi),%ymm5
+vmovdqa 96(%rdi),%ymm6
+vmovdqa 128(%rdi),%ymm7
+vmovdqa 160(%rdi),%ymm8
+vmovdqa 192(%rdi),%ymm9
+vmovdqa 224(%rdi),%ymm10
+
+fqmulprecomp 1,2,3,11
+fqmulprecomp 1,2,4,12
+fqmulprecomp 1,2,5,13
+fqmulprecomp 1,2,6,14
+fqmulprecomp 1,2,7,15
+fqmulprecomp 1,2,8,11
+fqmulprecomp 1,2,9,12
+fqmulprecomp 1,2,10,13
+
+#store
+vmovdqa %ymm3,(%rdi)
+vmovdqa %ymm4,32(%rdi)
+vmovdqa %ymm5,64(%rdi)
+vmovdqa %ymm6,96(%rdi)
+vmovdqa %ymm7,128(%rdi)
+vmovdqa %ymm8,160(%rdi)
+vmovdqa %ymm9,192(%rdi)
+vmovdqa %ymm10,224(%rdi)
+
+ret
+
+.global cdecl(tomont_avx)
+cdecl(tomont_avx):
+#consts
+vmovdqa _16XQ*2(%rsi),%ymm0
+vmovdqa _16XMONTSQLO*2(%rsi),%ymm1
+vmovdqa _16XMONTSQHI*2(%rsi),%ymm2
+call tomont128_avx
+add $256,%rdi
+call tomont128_avx
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.inc
new file mode 100644
index 0000000000..4b7afc3118
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/fq.inc
@@ -0,0 +1,30 @@
+.macro red16 r,rs=0,x=12
+vpmulhw %ymm1,%ymm\r,%ymm\x
+.if \rs
+vpmulhrsw %ymm\rs,%ymm\x,%ymm\x
+.else
+vpsraw $10,%ymm\x,%ymm\x
+.endif
+vpmullw %ymm0,%ymm\x,%ymm\x
+vpsubw %ymm\x,%ymm\r,%ymm\r
+.endm
+
+.macro csubq r,x=12
+vpsubw %ymm0,%ymm\r,%ymm\r
+vpsraw $15,%ymm\r,%ymm\x
+vpand %ymm0,%ymm\x,%ymm\x
+vpaddw %ymm\x,%ymm\r,%ymm\r
+.endm
+
+.macro caddq r,x=12
+vpsraw $15,%ymm\r,%ymm\x
+vpand %ymm0,%ymm\x,%ymm\x
+vpaddw %ymm\x,%ymm\r,%ymm\r
+.endm
+
+.macro fqmulprecomp al,ah,b,x=12
+vpmullw %ymm\al,%ymm\b,%ymm\x
+vpmulhw %ymm\ah,%ymm\b,%ymm\b
+vpmulhw %ymm0,%ymm\x,%ymm\x
+vpsubw %ymm\x,%ymm\b,%ymm\b
+.endm
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/indcpa.c
new file mode 100644
index 0000000000..572ce49007
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/indcpa.c
@@ -0,0 +1,566 @@
+#include
+#include
+#include
+#include
+#include "align.h"
+#include "params.h"
+#include "indcpa.h"
+#include "polyvec.h"
+#include "poly.h"
+#include "ntt.h"
+#include "cbd.h"
+#include "rejsample.h"
+#include "symmetric.h"
+#include "randombytes.h"
+
+/*************************************************
+* Name: pack_pk
+*
+* Description: Serialize the public key as concatenation of the
+* serialized vector of polynomials pk and the
+* public seed used to generate the matrix A.
+* The polynomial coefficients in pk are assumed to
+* lie in the invertal [0,q], i.e. pk must be reduced
+* by polyvec_reduce().
+*
+* Arguments: uint8_t *r: pointer to the output serialized public key
+* polyvec *pk: pointer to the input public-key polyvec
+* const uint8_t *seed: pointer to the input public seed
+**************************************************/
+static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES],
+ polyvec *pk,
+ const uint8_t seed[KYBER_SYMBYTES])
+{
+ polyvec_tobytes(r, pk);
+ memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES);
+}
+
+/*************************************************
+* Name: unpack_pk
+*
+* Description: De-serialize public key from a byte array;
+* approximate inverse of pack_pk
+*
+* Arguments: - polyvec *pk: pointer to output public-key polynomial vector
+* - uint8_t *seed: pointer to output seed to generate matrix A
+* - const uint8_t *packedpk: pointer to input serialized public key
+**************************************************/
+static void unpack_pk(polyvec *pk,
+ uint8_t seed[KYBER_SYMBYTES],
+ const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES])
+{
+ polyvec_frombytes(pk, packedpk);
+ memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES);
+}
+
+/*************************************************
+* Name: pack_sk
+*
+* Description: Serialize the secret key.
+* The polynomial coefficients in sk are assumed to
+* lie in the invertal [0,q], i.e. sk must be reduced
+* by polyvec_reduce().
+*
+* Arguments: - uint8_t *r: pointer to output serialized secret key
+* - polyvec *sk: pointer to input vector of polynomials (secret key)
+**************************************************/
+static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk)
+{
+ polyvec_tobytes(r, sk);
+}
+
+/*************************************************
+* Name: unpack_sk
+*
+* Description: De-serialize the secret key; inverse of pack_sk
+*
+* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key)
+* - const uint8_t *packedsk: pointer to input serialized secret key
+**************************************************/
+static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES])
+{
+ polyvec_frombytes(sk, packedsk);
+}
+
+/*************************************************
+* Name: pack_ciphertext
+*
+* Description: Serialize the ciphertext as concatenation of the
+* compressed and serialized vector of polynomials b
+* and the compressed and serialized polynomial v.
+* The polynomial coefficients in b and v are assumed to
+* lie in the invertal [0,q], i.e. b and v must be reduced
+* by polyvec_reduce() and poly_reduce(), respectively.
+*
+* Arguments: uint8_t *r: pointer to the output serialized ciphertext
+* poly *pk: pointer to the input vector of polynomials b
+* poly *v: pointer to the input polynomial v
+**************************************************/
+static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v)
+{
+ polyvec_compress(r, b);
+ poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v);
+}
+
+/*************************************************
+* Name: unpack_ciphertext
+*
+* Description: De-serialize and decompress ciphertext from a byte array;
+* approximate inverse of pack_ciphertext
+*
+* Arguments: - polyvec *b: pointer to the output vector of polynomials b
+* - poly *v: pointer to the output polynomial v
+* - const uint8_t *c: pointer to the input serialized ciphertext
+**************************************************/
+static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES])
+{
+ polyvec_decompress(b, c);
+ poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES);
+}
+
+/*************************************************
+* Name: rej_uniform
+*
+* Description: Run rejection sampling on uniform random bytes to generate
+* uniform random integers mod q
+*
+* Arguments: - int16_t *r: pointer to output array
+* - unsigned int len: requested number of 16-bit integers (uniform mod q)
+* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes)
+* - unsigned int buflen: length of input buffer in bytes
+*
+* Returns number of sampled 16-bit integers (at most len)
+**************************************************/
+static unsigned int rej_uniform(int16_t *r,
+ unsigned int len,
+ const uint8_t *buf,
+ unsigned int buflen)
+{
+ unsigned int ctr, pos;
+ uint16_t val0, val1;
+
+ ctr = pos = 0;
+ while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3
+ val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF;
+ val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF;
+ pos += 3;
+
+ if(val0 < KYBER_Q)
+ r[ctr++] = val0;
+ if(ctr < len && val1 < KYBER_Q)
+ r[ctr++] = val1;
+ }
+
+ return ctr;
+}
+
+#define gen_a(A,B) gen_matrix(A,B,0)
+#define gen_at(A,B) gen_matrix(A,B,1)
+
+/*************************************************
+* Name: gen_matrix
+*
+* Description: Deterministically generate matrix A (or the transpose of A)
+* from a seed. Entries of the matrix are polynomials that look
+* uniformly random. Performs rejection sampling on output of
+* a XOF
+*
+* Arguments: - polyvec *a: pointer to ouptput matrix A
+* - const uint8_t *seed: pointer to input seed
+* - int transposed: boolean deciding whether A or A^T is generated
+**************************************************/
+#if KYBER_K == 2
+void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+{
+ unsigned int ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+ shake128x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ if(transposed) {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = 0;
+ buf[1].coeffs[33] = 1;
+ buf[2].coeffs[32] = 1;
+ buf[2].coeffs[33] = 0;
+ buf[3].coeffs[32] = 1;
+ buf[3].coeffs[33] = 1;
+ }
+ else {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = 1;
+ buf[1].coeffs[33] = 0;
+ buf[2].coeffs[32] = 0;
+ buf[2].coeffs[33] = 1;
+ buf[3].coeffs[32] = 1;
+ buf[3].coeffs[33] = 1;
+ }
+
+ shake128x4_inc_init(&state);
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+ ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs);
+ ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs);
+ ctr2 = rej_uniform_avx(a[1].vec[0].coeffs, buf[2].coeffs);
+ ctr3 = rej_uniform_avx(a[1].vec[1].coeffs, buf[3].coeffs);
+
+ while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) {
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state);
+
+ ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE);
+ ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
+
+ poly_nttunpack(&a[0].vec[0]);
+ poly_nttunpack(&a[0].vec[1]);
+ poly_nttunpack(&a[1].vec[0]);
+ poly_nttunpack(&a[1].vec[1]);
+ shake128x4_inc_ctx_release(&state);
+}
+#elif KYBER_K == 3
+void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+{
+ unsigned int ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+ shake128x4incctx state;
+ shake128incctx state1x;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ if(transposed) {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = 0;
+ buf[1].coeffs[33] = 1;
+ buf[2].coeffs[32] = 0;
+ buf[2].coeffs[33] = 2;
+ buf[3].coeffs[32] = 1;
+ buf[3].coeffs[33] = 0;
+ }
+ else {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = 1;
+ buf[1].coeffs[33] = 0;
+ buf[2].coeffs[32] = 2;
+ buf[2].coeffs[33] = 0;
+ buf[3].coeffs[32] = 0;
+ buf[3].coeffs[33] = 1;
+ }
+
+ shake128x4_inc_init(&state);
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+ ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs);
+ ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs);
+ ctr2 = rej_uniform_avx(a[0].vec[2].coeffs, buf[2].coeffs);
+ ctr3 = rej_uniform_avx(a[1].vec[0].coeffs, buf[3].coeffs);
+
+ while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) {
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state);
+
+ ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE);
+ ctr2 += rej_uniform(a[0].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[1].vec[0].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
+
+ poly_nttunpack(&a[0].vec[0]);
+ poly_nttunpack(&a[0].vec[1]);
+ poly_nttunpack(&a[0].vec[2]);
+ poly_nttunpack(&a[1].vec[0]);
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ if(transposed) {
+ buf[0].coeffs[32] = 1;
+ buf[0].coeffs[33] = 1;
+ buf[1].coeffs[32] = 1;
+ buf[1].coeffs[33] = 2;
+ buf[2].coeffs[32] = 2;
+ buf[2].coeffs[33] = 0;
+ buf[3].coeffs[32] = 2;
+ buf[3].coeffs[33] = 1;
+ }
+ else {
+ buf[0].coeffs[32] = 1;
+ buf[0].coeffs[33] = 1;
+ buf[1].coeffs[32] = 2;
+ buf[1].coeffs[33] = 1;
+ buf[2].coeffs[32] = 0;
+ buf[2].coeffs[33] = 2;
+ buf[3].coeffs[32] = 1;
+ buf[3].coeffs[33] = 2;
+ }
+
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+ ctr0 = rej_uniform_avx(a[1].vec[1].coeffs, buf[0].coeffs);
+ ctr1 = rej_uniform_avx(a[1].vec[2].coeffs, buf[1].coeffs);
+ ctr2 = rej_uniform_avx(a[2].vec[0].coeffs, buf[2].coeffs);
+ ctr3 = rej_uniform_avx(a[2].vec[1].coeffs, buf[3].coeffs);
+
+ while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) {
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state);
+
+ ctr0 += rej_uniform(a[1].vec[1].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ ctr1 += rej_uniform(a[1].vec[2].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE);
+ ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
+ shake128x4_inc_ctx_release(&state);
+
+ poly_nttunpack(&a[1].vec[1]);
+ poly_nttunpack(&a[1].vec[2]);
+ poly_nttunpack(&a[2].vec[0]);
+ poly_nttunpack(&a[2].vec[1]);
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ buf[0].coeffs[32] = 2;
+ buf[0].coeffs[33] = 2;
+
+ shake128_inc_init(&state1x);
+ shake128_absorb_once(&state1x, buf[0].coeffs, 34);
+ shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x);
+ ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs);
+ while(ctr0 < KYBER_N) {
+ shake128_squeezeblocks(buf[0].coeffs, 1, &state1x);
+ ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ }
+ shake128_inc_ctx_release(&state1x);
+
+ poly_nttunpack(&a[2].vec[2]);
+}
+#elif KYBER_K == 4
+void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+{
+ unsigned int i, ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+ shake128x4incctx state;
+ shake128x4_inc_init(&state);
+
+ for(i=0;i<4;i++) {
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ if(transposed) {
+ buf[0].coeffs[32] = i;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = i;
+ buf[1].coeffs[33] = 1;
+ buf[2].coeffs[32] = i;
+ buf[2].coeffs[33] = 2;
+ buf[3].coeffs[32] = i;
+ buf[3].coeffs[33] = 3;
+ }
+ else {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = i;
+ buf[1].coeffs[32] = 1;
+ buf[1].coeffs[33] = i;
+ buf[2].coeffs[32] = 2;
+ buf[2].coeffs[33] = i;
+ buf[3].coeffs[32] = 3;
+ buf[3].coeffs[33] = i;
+ }
+
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+ ctr0 = rej_uniform_avx(a[i].vec[0].coeffs, buf[0].coeffs);
+ ctr1 = rej_uniform_avx(a[i].vec[1].coeffs, buf[1].coeffs);
+ ctr2 = rej_uniform_avx(a[i].vec[2].coeffs, buf[2].coeffs);
+ ctr3 = rej_uniform_avx(a[i].vec[3].coeffs, buf[3].coeffs);
+
+ while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) {
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state);
+
+ ctr0 += rej_uniform(a[i].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ ctr1 += rej_uniform(a[i].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE);
+ ctr2 += rej_uniform(a[i].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[i].vec[3].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
+
+ poly_nttunpack(&a[i].vec[0]);
+ poly_nttunpack(&a[i].vec[1]);
+ poly_nttunpack(&a[i].vec[2]);
+ poly_nttunpack(&a[i].vec[3]);
+ }
+ shake128x4_inc_ctx_release(&state);
+}
+#endif
+
+/*************************************************
+* Name: indcpa_keypair_derand
+*
+* Description: Generates public and private key for the CPA-secure
+* public-key encryption scheme underlying Kyber
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (of length KYBER_INDCPA_SECRETKEYBYTES bytes)
+* - const uint8_t *coins: pointer to input randomness
+* (of length KYBER_SYMBYTES bytes)
+**************************************************/
+void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES])
+{
+ unsigned int i;
+ uint8_t buf[2*KYBER_SYMBYTES];
+ const uint8_t *publicseed = buf;
+ const uint8_t *noiseseed = buf + KYBER_SYMBYTES;
+ polyvec a[KYBER_K], e, pkpv, skpv;
+
+ hash_g(buf, coins, KYBER_SYMBYTES);
+
+ gen_a(a, publicseed);
+
+#if KYBER_K == 2
+ poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3);
+#elif KYBER_K == 3
+ poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3);
+ poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7);
+#elif KYBER_K == 4
+ poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3);
+ poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7);
+#endif
+
+ polyvec_ntt(&skpv);
+ polyvec_reduce(&skpv);
+ polyvec_ntt(&e);
+
+ // matrix-vector multiplication
+ for(i=0;i
+#include "params.h"
+#include "polyvec.h"
+
+#define gen_matrix KYBER_NAMESPACE(gen_matrix)
+void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed);
+
+#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand)
+void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_enc KYBER_NAMESPACE(indcpa_enc)
+void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_dec KYBER_NAMESPACE(indcpa_dec)
+void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/invntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/invntt.S
new file mode 100644
index 0000000000..76d4189996
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/invntt.S
@@ -0,0 +1,193 @@
+#include "consts.h"
+.include "shuffle.inc"
+.include "fq.inc"
+
+.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3
+vpsubw %ymm\rl0,%ymm\rh0,%ymm12
+vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0
+vpsubw %ymm\rl1,%ymm\rh1,%ymm13
+
+vpmullw %ymm\zl0,%ymm12,%ymm\rh0
+vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1
+vpsubw %ymm\rl2,%ymm\rh2,%ymm14
+
+vpmullw %ymm\zl0,%ymm13,%ymm\rh1
+vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2
+vpsubw %ymm\rl3,%ymm\rh3,%ymm15
+
+vpmullw %ymm\zl1,%ymm14,%ymm\rh2
+vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3
+vpmullw %ymm\zl1,%ymm15,%ymm\rh3
+
+vpmulhw %ymm\zh0,%ymm12,%ymm12
+vpmulhw %ymm\zh0,%ymm13,%ymm13
+
+vpmulhw %ymm\zh1,%ymm14,%ymm14
+vpmulhw %ymm\zh1,%ymm15,%ymm15
+
+vpmulhw %ymm0,%ymm\rh0,%ymm\rh0
+
+vpmulhw %ymm0,%ymm\rh1,%ymm\rh1
+
+vpmulhw %ymm0,%ymm\rh2,%ymm\rh2
+vpmulhw %ymm0,%ymm\rh3,%ymm\rh3
+
+#
+
+#
+
+vpsubw %ymm\rh0,%ymm12,%ymm\rh0
+
+vpsubw %ymm\rh1,%ymm13,%ymm\rh1
+
+vpsubw %ymm\rh2,%ymm14,%ymm\rh2
+vpsubw %ymm\rh3,%ymm15,%ymm\rh3
+.endm
+
+.macro intt_levels0t5 off
+/* level 0 */
+vmovdqa _16XFLO*2(%rsi),%ymm2
+vmovdqa _16XFHI*2(%rsi),%ymm3
+
+vmovdqa (128*\off+ 0)*2(%rdi),%ymm4
+vmovdqa (128*\off+ 32)*2(%rdi),%ymm6
+vmovdqa (128*\off+ 16)*2(%rdi),%ymm5
+vmovdqa (128*\off+ 48)*2(%rdi),%ymm7
+
+fqmulprecomp 2,3,4
+fqmulprecomp 2,3,6
+fqmulprecomp 2,3,5
+fqmulprecomp 2,3,7
+
+vmovdqa (128*\off+ 64)*2(%rdi),%ymm8
+vmovdqa (128*\off+ 96)*2(%rdi),%ymm10
+vmovdqa (128*\off+ 80)*2(%rdi),%ymm9
+vmovdqa (128*\off+112)*2(%rdi),%ymm11
+
+fqmulprecomp 2,3,8
+fqmulprecomp 2,3,10
+fqmulprecomp 2,3,9
+fqmulprecomp 2,3,11
+
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3
+vmovdqa _REVIDXB*2(%rsi),%ymm12
+vpshufb %ymm12,%ymm15,%ymm15
+vpshufb %ymm12,%ymm1,%ymm1
+vpshufb %ymm12,%ymm2,%ymm2
+vpshufb %ymm12,%ymm3,%ymm3
+
+butterfly 4,5,8,9,6,7,10,11,15,1,2,3
+
+/* level 1 */
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3
+vmovdqa _REVIDXB*2(%rsi),%ymm1
+vpshufb %ymm1,%ymm2,%ymm2
+vpshufb %ymm1,%ymm3,%ymm3
+
+butterfly 4,5,6,7,8,9,10,11,2,2,3,3
+
+shuffle1 4,5,3,5
+shuffle1 6,7,4,7
+shuffle1 8,9,6,9
+shuffle1 10,11,8,11
+
+/* level 2 */
+vmovdqa _REVIDXD*2(%rsi),%ymm12
+vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2
+vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10
+
+butterfly 3,4,6,8,5,7,9,11,2,2,10,10
+
+vmovdqa _16XV*2(%rsi),%ymm1
+red16 3
+
+shuffle2 3,4,10,4
+shuffle2 6,8,3,8
+shuffle2 5,7,6,7
+shuffle2 9,11,5,11
+
+/* level 3 */
+vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2
+vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9
+
+butterfly 10,3,6,5,4,8,7,11,2,2,9,9
+
+shuffle4 10,3,9,3
+shuffle4 6,5,10,5
+shuffle4 4,8,6,8
+shuffle4 7,11,4,11
+
+/* level 4 */
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7
+
+butterfly 9,10,6,4,3,5,8,11,2,2,7,7
+
+red16 9
+
+shuffle8 9,10,7,10
+shuffle8 6,4,9,4
+shuffle8 3,5,6,5
+shuffle8 8,11,3,11
+
+/* level 5 */
+vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2
+vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8
+
+butterfly 7,9,6,3,10,4,5,11,2,2,8,8
+
+vmovdqa %ymm7,(128*\off+ 0)*2(%rdi)
+vmovdqa %ymm9,(128*\off+ 16)*2(%rdi)
+vmovdqa %ymm6,(128*\off+ 32)*2(%rdi)
+vmovdqa %ymm3,(128*\off+ 48)*2(%rdi)
+vmovdqa %ymm10,(128*\off+ 64)*2(%rdi)
+vmovdqa %ymm4,(128*\off+ 80)*2(%rdi)
+vmovdqa %ymm5,(128*\off+ 96)*2(%rdi)
+vmovdqa %ymm11,(128*\off+112)*2(%rdi)
+.endm
+
+.macro intt_level6 off
+/* level 6 */
+vmovdqa (64*\off+ 0)*2(%rdi),%ymm4
+vmovdqa (64*\off+128)*2(%rdi),%ymm8
+vmovdqa (64*\off+ 16)*2(%rdi),%ymm5
+vmovdqa (64*\off+144)*2(%rdi),%ymm9
+vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2
+
+vmovdqa (64*\off+ 32)*2(%rdi),%ymm6
+vmovdqa (64*\off+160)*2(%rdi),%ymm10
+vmovdqa (64*\off+ 48)*2(%rdi),%ymm7
+vmovdqa (64*\off+176)*2(%rdi),%ymm11
+vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3
+
+butterfly 4,5,6,7,8,9,10,11
+
+.if \off == 0
+red16 4
+.endif
+
+vmovdqa %ymm4,(64*\off+ 0)*2(%rdi)
+vmovdqa %ymm5,(64*\off+ 16)*2(%rdi)
+vmovdqa %ymm6,(64*\off+ 32)*2(%rdi)
+vmovdqa %ymm7,(64*\off+ 48)*2(%rdi)
+vmovdqa %ymm8,(64*\off+128)*2(%rdi)
+vmovdqa %ymm9,(64*\off+144)*2(%rdi)
+vmovdqa %ymm10,(64*\off+160)*2(%rdi)
+vmovdqa %ymm11,(64*\off+176)*2(%rdi)
+.endm
+
+.text
+.global cdecl(invntt_avx)
+cdecl(invntt_avx):
+vmovdqa _16XQ*2(%rsi),%ymm0
+
+intt_levels0t5 0
+intt_levels0t5 1
+
+intt_level6 0
+intt_level6 1
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.c
new file mode 100644
index 0000000000..63abc1029c
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.c
@@ -0,0 +1,169 @@
+#include
+#include
+#include
+#include "params.h"
+#include "kem.h"
+#include "indcpa.h"
+#include "verify.h"
+#include "symmetric.h"
+#include "randombytes.h"
+/*************************************************
+* Name: crypto_kem_keypair_derand
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+* - uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_keypair_derand(uint8_t *pk,
+ uint8_t *sk,
+ const uint8_t *coins)
+{
+ indcpa_keypair_derand(pk, sk, coins);
+ memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ /* Value z for pseudo-random output on reject */
+ memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_keypair
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_keypair(uint8_t *pk,
+ uint8_t *sk)
+{
+ uint8_t coins[2*KYBER_SYMBYTES];
+ randombytes(coins, 2*KYBER_SYMBYTES);
+ crypto_kem_keypair_derand(pk, sk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc_derand
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - const uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_enc_derand(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk,
+ const uint8_t *coins)
+{
+ uint8_t buf[2*KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2*KYBER_SYMBYTES];
+
+ memcpy(buf, coins, KYBER_SYMBYTES);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_g(kr, buf, 2*KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES);
+
+ memcpy(ss,kr,KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_enc(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk)
+{
+ uint8_t coins[KYBER_SYMBYTES];
+ randombytes(coins, KYBER_SYMBYTES);
+ crypto_kem_enc_derand(ct, ss, pk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_dec
+*
+* Description: Generates shared secret for given
+* cipher text and private key
+*
+* Arguments: - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *ct: pointer to input cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - const uint8_t *sk: pointer to input private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0.
+*
+* On failure, ss will contain a pseudo-random value.
+**************************************************/
+int crypto_kem_dec(uint8_t *ss,
+ const uint8_t *ct,
+ const uint8_t *sk)
+{
+ int fail;
+ uint8_t buf[2*KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2*KYBER_SYMBYTES];
+ uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES];
+ const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES;
+
+ indcpa_dec(buf, ct, sk);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES);
+ hash_g(kr, buf, 2*KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES);
+
+ fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES);
+
+ /* Compute rejection key */
+ rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct);
+
+ /* Copy true key to return buffer if fail is false */
+ cmov(ss,kr,KYBER_SYMBYTES,!fail);
+
+ return 0;
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.h
new file mode 100644
index 0000000000..234f11966b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/kem.h
@@ -0,0 +1,35 @@
+#ifndef KEM_H
+#define KEM_H
+
+#include
+#include "params.h"
+
+#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES
+#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES
+#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES
+#define CRYPTO_BYTES KYBER_SSBYTES
+
+#if (KYBER_K == 2)
+#define CRYPTO_ALGNAME "Kyber512"
+#elif (KYBER_K == 3)
+#define CRYPTO_ALGNAME "Kyber768"
+#elif (KYBER_K == 4)
+#define CRYPTO_ALGNAME "Kyber1024"
+#endif
+
+#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand)
+int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+
+#define crypto_kem_keypair KYBER_NAMESPACE(keypair)
+int crypto_kem_keypair(uint8_t *pk, uint8_t *sk);
+
+#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand)
+int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+
+#define crypto_kem_enc KYBER_NAMESPACE(enc)
+int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+
+#define crypto_kem_dec KYBER_NAMESPACE(dec)
+int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.S
new file mode 100644
index 0000000000..0ce7b41297
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.S
@@ -0,0 +1,189 @@
+#include "consts.h"
+.include "shuffle.inc"
+
+.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2
+vpmullw %ymm\zl0,%ymm\rh0,%ymm12
+vpmullw %ymm\zl0,%ymm\rh1,%ymm13
+
+vpmullw %ymm\zl1,%ymm\rh2,%ymm14
+vpmullw %ymm\zl1,%ymm\rh3,%ymm15
+
+vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0
+vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1
+
+vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2
+vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3
+.endm
+
+.macro reduce
+vpmulhw %ymm0,%ymm12,%ymm12
+vpmulhw %ymm0,%ymm13,%ymm13
+
+vpmulhw %ymm0,%ymm14,%ymm14
+vpmulhw %ymm0,%ymm15,%ymm15
+.endm
+
+.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3
+vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln
+vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0
+vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0
+
+vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1
+vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1
+vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2
+
+vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2
+vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3
+
+vpsubw %ymm12,%ymm\rln,%ymm\rln
+vpaddw %ymm12,%ymm\rh0,%ymm\rh0
+vpsubw %ymm13,%ymm\rl0,%ymm\rl0
+
+vpaddw %ymm13,%ymm\rh1,%ymm\rh1
+vpsubw %ymm14,%ymm\rl1,%ymm\rl1
+vpaddw %ymm14,%ymm\rh2,%ymm\rh2
+
+vpsubw %ymm15,%ymm\rl2,%ymm\rl2
+vpaddw %ymm15,%ymm\rh3,%ymm\rh3
+.endm
+
+.macro level0 off
+vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15
+vmovdqa (64*\off+128)*2(%rdi),%ymm8
+vmovdqa (64*\off+144)*2(%rdi),%ymm9
+vmovdqa (64*\off+160)*2(%rdi),%ymm10
+vmovdqa (64*\off+176)*2(%rdi),%ymm11
+vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2
+
+mul 8,9,10,11
+
+vmovdqa (64*\off+ 0)*2(%rdi),%ymm4
+vmovdqa (64*\off+ 16)*2(%rdi),%ymm5
+vmovdqa (64*\off+ 32)*2(%rdi),%ymm6
+vmovdqa (64*\off+ 48)*2(%rdi),%ymm7
+
+reduce
+update 3,4,5,6,7,8,9,10,11
+
+vmovdqa %ymm3,(64*\off+ 0)*2(%rdi)
+vmovdqa %ymm4,(64*\off+ 16)*2(%rdi)
+vmovdqa %ymm5,(64*\off+ 32)*2(%rdi)
+vmovdqa %ymm6,(64*\off+ 48)*2(%rdi)
+vmovdqa %ymm8,(64*\off+128)*2(%rdi)
+vmovdqa %ymm9,(64*\off+144)*2(%rdi)
+vmovdqa %ymm10,(64*\off+160)*2(%rdi)
+vmovdqa %ymm11,(64*\off+176)*2(%rdi)
+.endm
+
+.macro levels1t6 off
+/* level 1 */
+vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15
+vmovdqa (128*\off+ 64)*2(%rdi),%ymm8
+vmovdqa (128*\off+ 80)*2(%rdi),%ymm9
+vmovdqa (128*\off+ 96)*2(%rdi),%ymm10
+vmovdqa (128*\off+112)*2(%rdi),%ymm11
+vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2
+
+mul 8,9,10,11
+
+vmovdqa (128*\off+ 0)*2(%rdi),%ymm4
+vmovdqa (128*\off+ 16)*2(%rdi),%ymm5
+vmovdqa (128*\off+ 32)*2(%rdi),%ymm6
+vmovdqa (128*\off+ 48)*2(%rdi),%ymm7
+
+reduce
+update 3,4,5,6,7,8,9,10,11
+
+/* level 2 */
+shuffle8 5,10,7,10
+shuffle8 6,11,5,11
+
+vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2
+
+mul 7,10,5,11
+
+shuffle8 3,8,6,8
+shuffle8 4,9,3,9
+
+reduce
+update 4,6,8,3,9,7,10,5,11
+
+/* level 3 */
+shuffle4 8,5,9,5
+shuffle4 3,11,8,11
+
+vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2
+
+mul 9,5,8,11
+
+shuffle4 4,7,3,7
+shuffle4 6,10,4,10
+
+reduce
+update 6,3,7,4,10,9,5,8,11
+
+/* level 4 */
+shuffle2 7,8,10,8
+shuffle2 4,11,7,11
+
+vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2
+
+mul 10,8,7,11
+
+shuffle2 6,9,4,9
+shuffle2 3,5,6,5
+
+reduce
+update 3,4,9,6,5,10,8,7,11
+
+/* level 5 */
+shuffle1 9,7,5,7
+shuffle1 6,11,9,11
+
+vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2
+
+mul 5,7,9,11
+
+shuffle1 3,10,6,10
+shuffle1 4,8,3,8
+
+reduce
+update 4,6,10,3,8,5,7,9,11
+
+/* level 6 */
+vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14
+vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8
+vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2
+
+mul 10,3,9,11,14,15,8,2
+
+reduce
+update 8,4,6,5,7,10,3,9,11
+
+vmovdqa %ymm8,(128*\off+ 0)*2(%rdi)
+vmovdqa %ymm4,(128*\off+ 16)*2(%rdi)
+vmovdqa %ymm10,(128*\off+ 32)*2(%rdi)
+vmovdqa %ymm3,(128*\off+ 48)*2(%rdi)
+vmovdqa %ymm6,(128*\off+ 64)*2(%rdi)
+vmovdqa %ymm5,(128*\off+ 80)*2(%rdi)
+vmovdqa %ymm9,(128*\off+ 96)*2(%rdi)
+vmovdqa %ymm11,(128*\off+112)*2(%rdi)
+.endm
+
+.text
+.global cdecl(ntt_avx)
+cdecl(ntt_avx):
+vmovdqa _16XQ*2(%rsi),%ymm0
+
+level0 0
+level0 1
+
+levels1t6 0
+levels1t6 1
+
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.h
new file mode 100644
index 0000000000..a4f48e343b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/ntt.h
@@ -0,0 +1,28 @@
+#ifndef NTT_H
+#define NTT_H
+
+#include
+#include
+
+#define ntt_avx KYBER_NAMESPACE(ntt_avx)
+void ntt_avx(__m256i *r, const __m256i *qdata);
+#define invntt_avx KYBER_NAMESPACE(invntt_avx)
+void invntt_avx(__m256i *r, const __m256i *qdata);
+
+#define nttpack_avx KYBER_NAMESPACE(nttpack_avx)
+void nttpack_avx(__m256i *r, const __m256i *qdata);
+#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx)
+void nttunpack_avx(__m256i *r, const __m256i *qdata);
+
+#define basemul_avx KYBER_NAMESPACE(basemul_avx)
+void basemul_avx(__m256i *r,
+ const __m256i *a,
+ const __m256i *b,
+ const __m256i *qdata);
+
+#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx)
+void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata);
+#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx)
+void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/params.h
new file mode 100644
index 0000000000..fdc688ea2b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/params.h
@@ -0,0 +1,68 @@
+#ifndef PARAMS_H
+#define PARAMS_H
+
+#ifndef KYBER_K
+#define KYBER_K 3 /* Change this for different security strengths */
+#endif
+
+//#define KYBER_90S /* Uncomment this if you want the 90S variant */
+
+/* Don't change parameters below this line */
+#if (KYBER_K == 2)
+#ifdef KYBER_90S
+#define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s
+#else
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_avx2_##s
+#endif
+#elif (KYBER_K == 3)
+#ifdef KYBER_90S
+#define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s
+#else
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_avx2_##s
+#endif
+#elif (KYBER_K == 4)
+#ifdef KYBER_90S
+#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s
+#else
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_avx2_##s
+#endif
+#else
+#error "KYBER_K must be in {2,3,4}"
+#endif
+
+#define KYBER_N 256
+#define KYBER_Q 3329
+
+#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */
+#define KYBER_SSBYTES 32 /* size in bytes of shared key */
+
+#define KYBER_POLYBYTES 384
+#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES)
+
+#if KYBER_K == 2
+#define KYBER_ETA1 3
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 3
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 4
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 160
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352)
+#endif
+
+#define KYBER_ETA2 2
+
+#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES)
+#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES)
+#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES)
+#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES)
+
+#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES)
+/* 32 bytes of additional space to save H(pk) */
+#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES)
+#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES)
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.c
new file mode 100644
index 0000000000..681fd6d23e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.c
@@ -0,0 +1,519 @@
+#include
+#include
+#include
+#include "align.h"
+#include "fips202x4.h"
+#include "params.h"
+#include "poly.h"
+#include "ntt.h"
+#include "consts.h"
+#include "reduce.h"
+#include "cbd.h"
+#include "symmetric.h"
+
+/*************************************************
+* Name: poly_compress
+*
+* Description: Compression and subsequent serialization of a polynomial.
+* The coefficients of the input polynomial are assumed to
+* lie in the invertal [0,q], i.e. the polynomial must be reduced
+* by poly_reduce().
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (of length KYBER_POLYCOMPRESSEDBYTES)
+* - const poly *a: pointer to input polynomial
+**************************************************/
+#if (KYBER_POLYCOMPRESSEDBYTES == 128)
+void poly_compress(uint8_t r[128], const poly * restrict a)
+{
+ unsigned int i;
+ __m256i f0, f1, f2, f3;
+ const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
+ const __m256i shift1 = _mm256_set1_epi16(1 << 9);
+ const __m256i mask = _mm256_set1_epi16(15);
+ const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1);
+ const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0);
+
+ for(i=0;ivec[4*i+0]);
+ f1 = _mm256_load_si256(&a->vec[4*i+1]);
+ f2 = _mm256_load_si256(&a->vec[4*i+2]);
+ f3 = _mm256_load_si256(&a->vec[4*i+3]);
+ f0 = _mm256_mulhi_epi16(f0,v);
+ f1 = _mm256_mulhi_epi16(f1,v);
+ f2 = _mm256_mulhi_epi16(f2,v);
+ f3 = _mm256_mulhi_epi16(f3,v);
+ f0 = _mm256_mulhrs_epi16(f0,shift1);
+ f1 = _mm256_mulhrs_epi16(f1,shift1);
+ f2 = _mm256_mulhrs_epi16(f2,shift1);
+ f3 = _mm256_mulhrs_epi16(f3,shift1);
+ f0 = _mm256_and_si256(f0,mask);
+ f1 = _mm256_and_si256(f1,mask);
+ f2 = _mm256_and_si256(f2,mask);
+ f3 = _mm256_and_si256(f3,mask);
+ f0 = _mm256_packus_epi16(f0,f1);
+ f2 = _mm256_packus_epi16(f2,f3);
+ f0 = _mm256_maddubs_epi16(f0,shift2);
+ f2 = _mm256_maddubs_epi16(f2,shift2);
+ f0 = _mm256_packus_epi16(f0,f2);
+ f0 = _mm256_permutevar8x32_epi32(f0,permdidx);
+ _mm256_storeu_si256((__m256i *)&r[32*i],f0);
+ }
+}
+
+void poly_decompress(poly * restrict r, const uint8_t a[128])
+{
+ unsigned int i;
+ __m128i t;
+ __m256i f;
+ const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]);
+ const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4,
+ 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0);
+ const __m256i mask = _mm256_set1_epi32(0x00F0000F);
+ const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048);
+
+ for(i=0;ivec[i],f);
+ }
+}
+
+#elif (KYBER_POLYCOMPRESSEDBYTES == 160)
+void poly_compress(uint8_t r[160], const poly * restrict a)
+{
+ unsigned int i;
+ __m256i f0, f1;
+ __m128i t0, t1;
+ const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
+ const __m256i shift1 = _mm256_set1_epi16(1 << 10);
+ const __m256i mask = _mm256_set1_epi16(31);
+ const __m256i shift2 = _mm256_set1_epi16((32 << 8) + 1);
+ const __m256i shift3 = _mm256_set1_epi32((1024 << 16) + 1);
+ const __m256i sllvdidx = _mm256_set1_epi64x(12);
+ const __m256i shufbidx = _mm256_set_epi8( 8,-1,-1,-1,-1,-1, 4, 3, 2, 1, 0,-1,12,11,10, 9,
+ -1,12,11,10, 9, 8,-1,-1,-1,-1,-1 ,4, 3, 2, 1, 0);
+
+ for(i=0;ivec[2*i+0]);
+ f1 = _mm256_load_si256(&a->vec[2*i+1]);
+ f0 = _mm256_mulhi_epi16(f0,v);
+ f1 = _mm256_mulhi_epi16(f1,v);
+ f0 = _mm256_mulhrs_epi16(f0,shift1);
+ f1 = _mm256_mulhrs_epi16(f1,shift1);
+ f0 = _mm256_and_si256(f0,mask);
+ f1 = _mm256_and_si256(f1,mask);
+ f0 = _mm256_packus_epi16(f0,f1);
+ f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7
+ f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3
+ f0 = _mm256_sllv_epi32(f0,sllvdidx);
+ f0 = _mm256_srlv_epi64(f0,sllvdidx);
+ f0 = _mm256_shuffle_epi8(f0,shufbidx);
+ t0 = _mm256_castsi256_si128(f0);
+ t1 = _mm256_extracti128_si256(f0,1);
+ t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx));
+ _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0);
+ memcpy(&r[20*i+16],&t1,4);
+ }
+}
+
+void poly_decompress(poly * restrict r, const uint8_t a[160])
+{
+ unsigned int i;
+ __m128i t;
+ __m256i f;
+ int16_t ti;
+ const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]);
+ const __m256i shufbidx = _mm256_set_epi8(9,9,9,8,8,8,8,7,7,6,6,6,6,5,5,5,
+ 4,4,4,3,3,3,3,2,2,1,1,1,1,0,0,0);
+ const __m256i mask = _mm256_set_epi16(248,1984,62,496,3968,124,992,31,
+ 248,1984,62,496,3968,124,992,31);
+ const __m256i shift = _mm256_set_epi16(128,16,512,64,8,256,32,1024,
+ 128,16,512,64,8,256,32,1024);
+
+ for(i=0;ivec[i],f);
+ }
+}
+
+#endif
+
+/*************************************************
+* Name: poly_tobytes
+*
+* Description: Serialization of a polynomial in NTT representation.
+* The coefficients of the input polynomial are assumed to
+* lie in the invertal [0,q], i.e. the polynomial must be reduced
+* by poly_reduce(). The coefficients are orderd as output by
+* poly_ntt(); the serialized output coefficients are in bitreversed
+* order.
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYBYTES bytes)
+* - poly *a: pointer to input polynomial
+**************************************************/
+void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a)
+{
+ ntttobytes_avx(r, a->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_frombytes
+*
+* Description: De-serialization of a polynomial;
+* inverse of poly_tobytes
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *a: pointer to input byte array
+* (of KYBER_POLYBYTES bytes)
+**************************************************/
+void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES])
+{
+ nttfrombytes_avx(r->vec, a, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_frommsg
+*
+* Description: Convert 32-byte message to polynomial
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *msg: pointer to input message
+**************************************************/
+void poly_frommsg(poly * restrict r, const uint8_t msg[KYBER_INDCPA_MSGBYTES])
+{
+#if (KYBER_INDCPA_MSGBYTES != 32)
+#error "KYBER_INDCPA_MSGBYTES must be equal to 32!"
+#endif
+ __m256i f, g0, g1, g2, g3, h0, h1, h2, h3;
+ const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3));
+ const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0));
+ const __m256i hqs = _mm256_set1_epi16((KYBER_Q+1)/2);
+
+#define FROMMSG64(i) \
+ g3 = _mm256_shuffle_epi32(f,0x55*i); \
+ g3 = _mm256_sllv_epi32(g3,shift); \
+ g3 = _mm256_shuffle_epi8(g3,idx); \
+ g0 = _mm256_slli_epi16(g3,12); \
+ g1 = _mm256_slli_epi16(g3,8); \
+ g2 = _mm256_slli_epi16(g3,4); \
+ g0 = _mm256_srai_epi16(g0,15); \
+ g1 = _mm256_srai_epi16(g1,15); \
+ g2 = _mm256_srai_epi16(g2,15); \
+ g3 = _mm256_srai_epi16(g3,15); \
+ g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \
+ g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \
+ g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \
+ g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \
+ h0 = _mm256_unpacklo_epi64(g0,g1); \
+ h2 = _mm256_unpackhi_epi64(g0,g1); \
+ h1 = _mm256_unpacklo_epi64(g2,g3); \
+ h3 = _mm256_unpackhi_epi64(g2,g3); \
+ g0 = _mm256_permute2x128_si256(h0,h1,0x20); \
+ g2 = _mm256_permute2x128_si256(h0,h1,0x31); \
+ g1 = _mm256_permute2x128_si256(h2,h3,0x20); \
+ g3 = _mm256_permute2x128_si256(h2,h3,0x31); \
+ _mm256_store_si256(&r->vec[0+2*i+0],g0); \
+ _mm256_store_si256(&r->vec[0+2*i+1],g1); \
+ _mm256_store_si256(&r->vec[8+2*i+0],g2); \
+ _mm256_store_si256(&r->vec[8+2*i+1],g3)
+
+ f = _mm256_loadu_si256((__m256i *)msg);
+ FROMMSG64(0);
+ FROMMSG64(1);
+ FROMMSG64(2);
+ FROMMSG64(3);
+}
+
+/*************************************************
+* Name: poly_tomsg
+*
+* Description: Convert polynomial to 32-byte message.
+* The coefficients of the input polynomial are assumed to
+* lie in the invertal [0,q], i.e. the polynomial must be reduced
+* by poly_reduce().
+*
+* Arguments: - uint8_t *msg: pointer to output message
+* - poly *a: pointer to input polynomial
+**************************************************/
+void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly * restrict a)
+{
+ unsigned int i;
+ uint32_t small;
+ __m256i f0, f1, g0, g1;
+ const __m256i hq = _mm256_set1_epi16((KYBER_Q - 1)/2);
+ const __m256i hhq = _mm256_set1_epi16((KYBER_Q - 1)/4);
+
+ for(i=0;ivec[2*i+0]);
+ f1 = _mm256_load_si256(&a->vec[2*i+1]);
+ f0 = _mm256_sub_epi16(hq, f0);
+ f1 = _mm256_sub_epi16(hq, f1);
+ g0 = _mm256_srai_epi16(f0, 15);
+ g1 = _mm256_srai_epi16(f1, 15);
+ f0 = _mm256_xor_si256(f0, g0);
+ f1 = _mm256_xor_si256(f1, g1);
+ f0 = _mm256_sub_epi16(f0, hhq);
+ f1 = _mm256_sub_epi16(f1, hhq);
+ f0 = _mm256_packs_epi16(f0, f1);
+ f0 = _mm256_permute4x64_epi64(f0, 0xD8);
+ small = _mm256_movemask_epi8(f0);
+ memcpy(&msg[4*i], &small, 4);
+ }
+}
+
+/*************************************************
+* Name: poly_getnoise_eta1
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA1
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ ALIGNED_UINT8(KYBER_ETA1*KYBER_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1
+ prf(buf.coeffs, KYBER_ETA1*KYBER_N/4, seed, nonce);
+ poly_cbd_eta1(r, buf.vec);
+}
+
+/*************************************************
+* Name: poly_getnoise_eta2
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ ALIGNED_UINT8(KYBER_ETA2*KYBER_N/4) buf;
+ prf(buf.coeffs, KYBER_ETA2*KYBER_N/4, seed, nonce);
+ poly_cbd_eta2(r, buf.vec);
+}
+
+#ifndef KYBER_90S
+#define NOISE_NBLOCKS ((KYBER_ETA1*KYBER_N/4+SHAKE256_RATE-1)/SHAKE256_RATE)
+void poly_getnoise_eta1_4x(poly *r0,
+ poly *r1,
+ poly *r2,
+ poly *r3,
+ const uint8_t seed[32],
+ uint8_t nonce0,
+ uint8_t nonce1,
+ uint8_t nonce2,
+ uint8_t nonce3)
+{
+ ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4];
+ __m256i f;
+ shake256x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ buf[0].coeffs[32] = nonce0;
+ buf[1].coeffs[32] = nonce1;
+ buf[2].coeffs[32] = nonce2;
+ buf[3].coeffs[32] = nonce3;
+
+ shake256x4_inc_init(&state);
+ shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33);
+ shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state);
+ shake256x4_inc_ctx_release(&state);
+
+ poly_cbd_eta1(r0, buf[0].vec);
+ poly_cbd_eta1(r1, buf[1].vec);
+ poly_cbd_eta1(r2, buf[2].vec);
+ poly_cbd_eta1(r3, buf[3].vec);
+}
+
+#if KYBER_K == 2
+void poly_getnoise_eta1122_4x(poly *r0,
+ poly *r1,
+ poly *r2,
+ poly *r3,
+ const uint8_t seed[32],
+ uint8_t nonce0,
+ uint8_t nonce1,
+ uint8_t nonce2,
+ uint8_t nonce3)
+{
+ ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4];
+ __m256i f;
+ shake256x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ buf[0].coeffs[32] = nonce0;
+ buf[1].coeffs[32] = nonce1;
+ buf[2].coeffs[32] = nonce2;
+ buf[3].coeffs[32] = nonce3;
+
+ shake256x4_inc_init(&state);
+ shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33);
+ shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state);
+ shake256x4_inc_ctx_release(&state);
+
+ poly_cbd_eta1(r0, buf[0].vec);
+ poly_cbd_eta1(r1, buf[1].vec);
+ poly_cbd_eta2(r2, buf[2].vec);
+ poly_cbd_eta2(r3, buf[3].vec);
+}
+#endif
+#endif
+
+/*************************************************
+* Name: poly_ntt
+*
+* Description: Computes negacyclic number-theoretic transform (NTT) of
+* a polynomial in place.
+* Input coefficients assumed to be in normal order,
+* output coefficients are in special order that is natural
+* for the vectorization. Input coefficients are assumed to be
+* bounded by q in absolute value, output coefficients are bounded
+* by 16118 in absolute value.
+*
+* Arguments: - poly *r: pointer to in/output polynomial
+**************************************************/
+void poly_ntt(poly *r)
+{
+ ntt_avx(r->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_invntt_tomont
+*
+* Description: Computes inverse of negacyclic number-theoretic transform (NTT)
+* of a polynomial in place;
+* Input coefficients assumed to be in special order from vectorized
+* forward ntt, output in normal order. Input coefficients can be
+* arbitrary 16-bit integers, output coefficients are bounded by 14870
+* in absolute value.
+*
+* Arguments: - poly *a: pointer to in/output polynomial
+**************************************************/
+void poly_invntt_tomont(poly *r)
+{
+ invntt_avx(r->vec, qdata.vec);
+}
+
+void poly_nttunpack(poly *r)
+{
+ nttunpack_avx(r->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_basemul_montgomery
+*
+* Description: Multiplication of two polynomials in NTT domain.
+* One of the input polynomials needs to have coefficients
+* bounded by q, the other polynomial can have arbitrary
+* coefficients. Output coefficients are bounded by 6656.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_basemul_montgomery(poly *r, const poly *a, const poly *b)
+{
+ basemul_avx(r->vec, a->vec, b->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_tomont
+*
+* Description: Inplace conversion of all coefficients of a polynomial
+* from normal domain to Montgomery domain
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+void poly_tomont(poly *r)
+{
+ tomont_avx(r->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_reduce
+*
+* Description: Applies Barrett reduction to all coefficients of a polynomial
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+void poly_reduce(poly *r)
+{
+ reduce_avx(r->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_add
+*
+* Description: Add two polynomials. No modular reduction
+* is performed.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_add(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ __m256i f0, f1;
+
+ for(i=0;ivec[i]);
+ f1 = _mm256_load_si256(&b->vec[i]);
+ f0 = _mm256_add_epi16(f0, f1);
+ _mm256_store_si256(&r->vec[i], f0);
+ }
+}
+
+/*************************************************
+* Name: poly_sub
+*
+* Description: Subtract two polynomials. No modular reduction
+* is performed.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_sub(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ __m256i f0, f1;
+
+ for(i=0;ivec[i]);
+ f1 = _mm256_load_si256(&b->vec[i]);
+ f0 = _mm256_sub_epi16(f0, f1);
+ _mm256_store_si256(&r->vec[i], f0);
+ }
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.h
new file mode 100644
index 0000000000..6a9cf71c70
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/poly.h
@@ -0,0 +1,77 @@
+#ifndef POLY_H
+#define POLY_H
+
+#include
+#include "align.h"
+#include "params.h"
+
+typedef ALIGNED_INT16(KYBER_N) poly;
+
+#define poly_compress KYBER_NAMESPACE(poly_compress)
+void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a);
+#define poly_decompress KYBER_NAMESPACE(poly_decompress)
+void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]);
+
+#define poly_tobytes KYBER_NAMESPACE(poly_tobytes)
+void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a);
+#define poly_frombytes KYBER_NAMESPACE(poly_frombytes)
+void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]);
+
+#define poly_frommsg KYBER_NAMESPACE(poly_frommsg)
+void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]);
+#define poly_tomsg KYBER_NAMESPACE(poly_tomsg)
+void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r);
+
+#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1)
+void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2)
+void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#ifndef KYBER_90S
+#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x)
+void poly_getnoise_eta1_4x(poly *r0,
+ poly *r1,
+ poly *r2,
+ poly *r3,
+ const uint8_t seed[32],
+ uint8_t nonce0,
+ uint8_t nonce1,
+ uint8_t nonce2,
+ uint8_t nonce3);
+
+#if KYBER_K == 2
+#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x)
+void poly_getnoise_eta1122_4x(poly *r0,
+ poly *r1,
+ poly *r2,
+ poly *r3,
+ const uint8_t seed[32],
+ uint8_t nonce0,
+ uint8_t nonce1,
+ uint8_t nonce2,
+ uint8_t nonce3);
+#endif
+#endif
+
+
+#define poly_ntt KYBER_NAMESPACE(poly_ntt)
+void poly_ntt(poly *r);
+#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont)
+void poly_invntt_tomont(poly *r);
+#define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack)
+void poly_nttunpack(poly *r);
+#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery)
+void poly_basemul_montgomery(poly *r, const poly *a, const poly *b);
+#define poly_tomont KYBER_NAMESPACE(poly_tomont)
+void poly_tomont(poly *r);
+
+#define poly_reduce KYBER_NAMESPACE(poly_reduce)
+void poly_reduce(poly *r);
+
+#define poly_add KYBER_NAMESPACE(poly_add)
+void poly_add(poly *r, const poly *a, const poly *b);
+#define poly_sub KYBER_NAMESPACE(poly_sub)
+void poly_sub(poly *r, const poly *a, const poly *b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.c
new file mode 100644
index 0000000000..a0174b7b3f
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.c
@@ -0,0 +1,307 @@
+#include
+#include
+#include
+#include "params.h"
+#include "polyvec.h"
+#include "poly.h"
+#include "ntt.h"
+#include "consts.h"
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+static void poly_compress10(uint8_t r[320], const poly * restrict a)
+{
+ unsigned int i;
+ __m256i f0, f1, f2;
+ __m128i t0, t1;
+ const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
+ const __m256i v8 = _mm256_slli_epi16(v,3);
+ const __m256i off = _mm256_set1_epi16(15);
+ const __m256i shift1 = _mm256_set1_epi16(1 << 12);
+ const __m256i mask = _mm256_set1_epi16(1023);
+ const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1);
+ const __m256i sllvdidx = _mm256_set1_epi64x(12);
+ const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9,
+ -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0);
+
+ for(i=0;ivec[i]);
+ f1 = _mm256_mullo_epi16(f0,v8);
+ f2 = _mm256_add_epi16(f0,off);
+ f0 = _mm256_slli_epi16(f0,3);
+ f0 = _mm256_mulhi_epi16(f0,v);
+ f2 = _mm256_sub_epi16(f1,f2);
+ f1 = _mm256_andnot_si256(f1,f2);
+ f1 = _mm256_srli_epi16(f1,15);
+ f0 = _mm256_sub_epi16(f0,f1);
+ f0 = _mm256_mulhrs_epi16(f0,shift1);
+ f0 = _mm256_and_si256(f0,mask);
+ f0 = _mm256_madd_epi16(f0,shift2);
+ f0 = _mm256_sllv_epi32(f0,sllvdidx);
+ f0 = _mm256_srli_epi64(f0,12);
+ f0 = _mm256_shuffle_epi8(f0,shufbidx);
+ t0 = _mm256_castsi256_si128(f0);
+ t1 = _mm256_extracti128_si256(f0,1);
+ t0 = _mm_blend_epi16(t0,t1,0xE0);
+ _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0);
+ memcpy(&r[20*i+16],&t1,4);
+ }
+}
+
+static void poly_decompress10(poly * restrict r, const uint8_t a[320+12])
+{
+ unsigned int i;
+ __m256i f;
+ const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q);
+ const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7,
+ 6, 5, 5, 4, 4, 3, 3, 2,
+ 9, 8, 8, 7, 7, 6, 6, 5,
+ 4, 3, 3, 2, 2, 1, 1, 0);
+ const __m256i sllvdidx = _mm256_set1_epi64x(4);
+ const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184);
+
+ for(i=0;ivec[i],f);
+ }
+}
+
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+static void poly_compress11(uint8_t r[352+2], const poly * restrict a)
+{
+ unsigned int i;
+ __m256i f0, f1, f2;
+ __m128i t0, t1;
+ const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
+ const __m256i v8 = _mm256_slli_epi16(v,3);
+ const __m256i off = _mm256_set1_epi16(36);
+ const __m256i shift1 = _mm256_set1_epi16(1 << 13);
+ const __m256i mask = _mm256_set1_epi16(2047);
+ const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1);
+ const __m256i sllvdidx = _mm256_set1_epi64x(10);
+ const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10);
+ const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5,
+ -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+
+ for(i=0;ivec[i]);
+ f1 = _mm256_mullo_epi16(f0,v8);
+ f2 = _mm256_add_epi16(f0,off);
+ f0 = _mm256_slli_epi16(f0,3);
+ f0 = _mm256_mulhi_epi16(f0,v);
+ f2 = _mm256_sub_epi16(f1,f2);
+ f1 = _mm256_andnot_si256(f1,f2);
+ f1 = _mm256_srli_epi16(f1,15);
+ f0 = _mm256_sub_epi16(f0,f1);
+ f0 = _mm256_mulhrs_epi16(f0,shift1);
+ f0 = _mm256_and_si256(f0,mask);
+ f0 = _mm256_madd_epi16(f0,shift2);
+ f0 = _mm256_sllv_epi32(f0,sllvdidx);
+ f1 = _mm256_bsrli_epi128(f0,8);
+ f0 = _mm256_srlv_epi64(f0,srlvqidx);
+ f1 = _mm256_slli_epi64(f1,34);
+ f0 = _mm256_add_epi64(f0,f1);
+ f0 = _mm256_shuffle_epi8(f0,shufbidx);
+ t0 = _mm256_castsi256_si128(f0);
+ t1 = _mm256_extracti128_si256(f0,1);
+ t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx));
+ _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0);
+ _mm_storel_epi64((__m128i *)&r[22*i+16],t1);
+ }
+}
+
+static void poly_decompress11(poly * restrict r, const uint8_t a[352+10])
+{
+ unsigned int i;
+ __m256i f;
+ const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]);
+ const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8,
+ 8, 7, 6, 5, 5, 4, 4, 3,
+ 10, 9, 9, 8, 7, 6, 6, 5,
+ 5, 4, 3, 2, 2, 1, 1, 0);
+ const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0);
+ const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0);
+ const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32);
+ const __m256i mask = _mm256_set1_epi16(32752);
+
+ for(i=0;ivec[i],f);
+ }
+}
+
+#endif
+
+/*************************************************
+* Name: polyvec_compress
+*
+* Description: Compress and serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECCOMPRESSEDBYTES)
+* - polyvec *a: pointer to input vector of polynomials
+**************************************************/
+void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a)
+{
+ unsigned int i;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ for(i=0;ivec[i]);
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ for(i=0;ivec[i]);
+#endif
+}
+
+/*************************************************
+* Name: polyvec_decompress
+*
+* Description: De-serialize and decompress vector of polynomials;
+* approximate inverse of polyvec_compress
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const uint8_t *a: pointer to input byte array
+* (of length KYBER_POLYVECCOMPRESSEDBYTES)
+**************************************************/
+void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12])
+{
+ unsigned int i;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ for(i=0;ivec[i],&a[320*i]);
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ for(i=0;ivec[i],&a[352*i]);
+#endif
+}
+
+/*************************************************
+* Name: polyvec_tobytes
+*
+* Description: Serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECBYTES)
+* - polyvec *a: pointer to input vector of polynomials
+**************************************************/
+void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_frombytes
+*
+* Description: De-serialize vector of polynomials;
+* inverse of polyvec_tobytes
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* - const polyvec *a: pointer to input vector of polynomials
+* (of length KYBER_POLYVECBYTES)
+**************************************************/
+void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES])
+{
+ unsigned int i;
+ for(i=0;ivec[i], a+i*KYBER_POLYBYTES);
+}
+
+/*************************************************
+* Name: polyvec_ntt
+*
+* Description: Apply forward NTT to all elements of a vector of polynomials
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+void polyvec_ntt(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_invntt_tomont
+*
+* Description: Apply inverse NTT to all elements of a vector of polynomials
+* and multiply by Montgomery factor 2^16
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+void polyvec_invntt_tomont(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_basemul_acc_montgomery
+*
+* Description: Multiply elements in a and b in NTT domain, accumulate into r,
+* and multiply by 2^-16.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ poly tmp;
+
+ poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]);
+ for(i=1;ivec[i],&b->vec[i]);
+ poly_add(r,r,&tmp);
+ }
+}
+
+/*************************************************
+* Name: polyvec_reduce
+*
+* Description: Applies Barrett reduction to each coefficient
+* of each element of a vector of polynomials;
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - polyvec *r: pointer to input/output polynomial
+**************************************************/
+void polyvec_reduce(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_add
+*
+* Description: Add vectors of polynomials
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ for(i=0;ivec[i], &a->vec[i], &b->vec[i]);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.h
new file mode 100644
index 0000000000..2ce23c31ff
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/polyvec.h
@@ -0,0 +1,36 @@
+#ifndef POLYVEC_H
+#define POLYVEC_H
+
+#include
+#include "params.h"
+#include "poly.h"
+
+typedef struct{
+ poly vec[KYBER_K];
+} polyvec;
+
+#define polyvec_compress KYBER_NAMESPACE(polyvec_compress)
+void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a);
+#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress)
+void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]);
+
+#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes)
+void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a);
+#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes)
+void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]);
+
+#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt)
+void polyvec_ntt(polyvec *r);
+#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont)
+void polyvec_invntt_tomont(polyvec *r);
+
+#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery)
+void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b);
+
+#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce)
+void polyvec_reduce(polyvec *r);
+
+#define polyvec_add KYBER_NAMESPACE(polyvec_add)
+void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/reduce.h
new file mode 100644
index 0000000000..5368185b5f
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/reduce.h
@@ -0,0 +1,12 @@
+#ifndef REDUCE_H
+#define REDUCE_H
+
+#include "params.h"
+#include
+
+#define reduce_avx KYBER_NAMESPACE(reduce_avx)
+void reduce_avx(__m256i *r, const __m256i *qdata);
+#define tomont_avx KYBER_NAMESPACE(tomont_avx)
+void tomont_avx(__m256i *r, const __m256i *qdata);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.c
new file mode 100644
index 0000000000..9060a44cb9
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.c
@@ -0,0 +1,398 @@
+#include
+#include
+#include
+#include "params.h"
+#include "consts.h"
+#include "rejsample.h"
+
+//#define BMI
+
+#ifndef BMI
+static const uint8_t idx[256][8] = {
+ {-1, -1, -1, -1, -1, -1, -1, -1},
+ { 0, -1, -1, -1, -1, -1, -1, -1},
+ { 2, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 2, -1, -1, -1, -1, -1, -1},
+ { 4, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 4, -1, -1, -1, -1, -1, -1},
+ { 2, 4, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 4, -1, -1, -1, -1, -1},
+ { 6, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 6, -1, -1, -1, -1, -1, -1},
+ { 2, 6, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 6, -1, -1, -1, -1, -1},
+ { 4, 6, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 6, -1, -1, -1, -1, -1},
+ { 2, 4, 6, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 6, -1, -1, -1, -1},
+ { 8, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 8, -1, -1, -1, -1, -1, -1},
+ { 2, 8, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 8, -1, -1, -1, -1, -1},
+ { 4, 8, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 8, -1, -1, -1, -1, -1},
+ { 2, 4, 8, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 8, -1, -1, -1, -1},
+ { 6, 8, -1, -1, -1, -1, -1, -1},
+ { 0, 6, 8, -1, -1, -1, -1, -1},
+ { 2, 6, 8, -1, -1, -1, -1, -1},
+ { 0, 2, 6, 8, -1, -1, -1, -1},
+ { 4, 6, 8, -1, -1, -1, -1, -1},
+ { 0, 4, 6, 8, -1, -1, -1, -1},
+ { 2, 4, 6, 8, -1, -1, -1, -1},
+ { 0, 2, 4, 6, 8, -1, -1, -1},
+ {10, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 10, -1, -1, -1, -1, -1, -1},
+ { 2, 10, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 10, -1, -1, -1, -1, -1},
+ { 4, 10, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 10, -1, -1, -1, -1, -1},
+ { 2, 4, 10, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 10, -1, -1, -1, -1},
+ { 6, 10, -1, -1, -1, -1, -1, -1},
+ { 0, 6, 10, -1, -1, -1, -1, -1},
+ { 2, 6, 10, -1, -1, -1, -1, -1},
+ { 0, 2, 6, 10, -1, -1, -1, -1},
+ { 4, 6, 10, -1, -1, -1, -1, -1},
+ { 0, 4, 6, 10, -1, -1, -1, -1},
+ { 2, 4, 6, 10, -1, -1, -1, -1},
+ { 0, 2, 4, 6, 10, -1, -1, -1},
+ { 8, 10, -1, -1, -1, -1, -1, -1},
+ { 0, 8, 10, -1, -1, -1, -1, -1},
+ { 2, 8, 10, -1, -1, -1, -1, -1},
+ { 0, 2, 8, 10, -1, -1, -1, -1},
+ { 4, 8, 10, -1, -1, -1, -1, -1},
+ { 0, 4, 8, 10, -1, -1, -1, -1},
+ { 2, 4, 8, 10, -1, -1, -1, -1},
+ { 0, 2, 4, 8, 10, -1, -1, -1},
+ { 6, 8, 10, -1, -1, -1, -1, -1},
+ { 0, 6, 8, 10, -1, -1, -1, -1},
+ { 2, 6, 8, 10, -1, -1, -1, -1},
+ { 0, 2, 6, 8, 10, -1, -1, -1},
+ { 4, 6, 8, 10, -1, -1, -1, -1},
+ { 0, 4, 6, 8, 10, -1, -1, -1},
+ { 2, 4, 6, 8, 10, -1, -1, -1},
+ { 0, 2, 4, 6, 8, 10, -1, -1},
+ {12, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 12, -1, -1, -1, -1, -1, -1},
+ { 2, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 12, -1, -1, -1, -1, -1},
+ { 4, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 12, -1, -1, -1, -1, -1},
+ { 2, 4, 12, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 12, -1, -1, -1, -1},
+ { 6, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 6, 12, -1, -1, -1, -1, -1},
+ { 2, 6, 12, -1, -1, -1, -1, -1},
+ { 0, 2, 6, 12, -1, -1, -1, -1},
+ { 4, 6, 12, -1, -1, -1, -1, -1},
+ { 0, 4, 6, 12, -1, -1, -1, -1},
+ { 2, 4, 6, 12, -1, -1, -1, -1},
+ { 0, 2, 4, 6, 12, -1, -1, -1},
+ { 8, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 8, 12, -1, -1, -1, -1, -1},
+ { 2, 8, 12, -1, -1, -1, -1, -1},
+ { 0, 2, 8, 12, -1, -1, -1, -1},
+ { 4, 8, 12, -1, -1, -1, -1, -1},
+ { 0, 4, 8, 12, -1, -1, -1, -1},
+ { 2, 4, 8, 12, -1, -1, -1, -1},
+ { 0, 2, 4, 8, 12, -1, -1, -1},
+ { 6, 8, 12, -1, -1, -1, -1, -1},
+ { 0, 6, 8, 12, -1, -1, -1, -1},
+ { 2, 6, 8, 12, -1, -1, -1, -1},
+ { 0, 2, 6, 8, 12, -1, -1, -1},
+ { 4, 6, 8, 12, -1, -1, -1, -1},
+ { 0, 4, 6, 8, 12, -1, -1, -1},
+ { 2, 4, 6, 8, 12, -1, -1, -1},
+ { 0, 2, 4, 6, 8, 12, -1, -1},
+ {10, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 10, 12, -1, -1, -1, -1, -1},
+ { 2, 10, 12, -1, -1, -1, -1, -1},
+ { 0, 2, 10, 12, -1, -1, -1, -1},
+ { 4, 10, 12, -1, -1, -1, -1, -1},
+ { 0, 4, 10, 12, -1, -1, -1, -1},
+ { 2, 4, 10, 12, -1, -1, -1, -1},
+ { 0, 2, 4, 10, 12, -1, -1, -1},
+ { 6, 10, 12, -1, -1, -1, -1, -1},
+ { 0, 6, 10, 12, -1, -1, -1, -1},
+ { 2, 6, 10, 12, -1, -1, -1, -1},
+ { 0, 2, 6, 10, 12, -1, -1, -1},
+ { 4, 6, 10, 12, -1, -1, -1, -1},
+ { 0, 4, 6, 10, 12, -1, -1, -1},
+ { 2, 4, 6, 10, 12, -1, -1, -1},
+ { 0, 2, 4, 6, 10, 12, -1, -1},
+ { 8, 10, 12, -1, -1, -1, -1, -1},
+ { 0, 8, 10, 12, -1, -1, -1, -1},
+ { 2, 8, 10, 12, -1, -1, -1, -1},
+ { 0, 2, 8, 10, 12, -1, -1, -1},
+ { 4, 8, 10, 12, -1, -1, -1, -1},
+ { 0, 4, 8, 10, 12, -1, -1, -1},
+ { 2, 4, 8, 10, 12, -1, -1, -1},
+ { 0, 2, 4, 8, 10, 12, -1, -1},
+ { 6, 8, 10, 12, -1, -1, -1, -1},
+ { 0, 6, 8, 10, 12, -1, -1, -1},
+ { 2, 6, 8, 10, 12, -1, -1, -1},
+ { 0, 2, 6, 8, 10, 12, -1, -1},
+ { 4, 6, 8, 10, 12, -1, -1, -1},
+ { 0, 4, 6, 8, 10, 12, -1, -1},
+ { 2, 4, 6, 8, 10, 12, -1, -1},
+ { 0, 2, 4, 6, 8, 10, 12, -1},
+ {14, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 14, -1, -1, -1, -1, -1, -1},
+ { 2, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 14, -1, -1, -1, -1, -1},
+ { 4, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 14, -1, -1, -1, -1, -1},
+ { 2, 4, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 14, -1, -1, -1, -1},
+ { 6, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 6, 14, -1, -1, -1, -1, -1},
+ { 2, 6, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 6, 14, -1, -1, -1, -1},
+ { 4, 6, 14, -1, -1, -1, -1, -1},
+ { 0, 4, 6, 14, -1, -1, -1, -1},
+ { 2, 4, 6, 14, -1, -1, -1, -1},
+ { 0, 2, 4, 6, 14, -1, -1, -1},
+ { 8, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 8, 14, -1, -1, -1, -1, -1},
+ { 2, 8, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 8, 14, -1, -1, -1, -1},
+ { 4, 8, 14, -1, -1, -1, -1, -1},
+ { 0, 4, 8, 14, -1, -1, -1, -1},
+ { 2, 4, 8, 14, -1, -1, -1, -1},
+ { 0, 2, 4, 8, 14, -1, -1, -1},
+ { 6, 8, 14, -1, -1, -1, -1, -1},
+ { 0, 6, 8, 14, -1, -1, -1, -1},
+ { 2, 6, 8, 14, -1, -1, -1, -1},
+ { 0, 2, 6, 8, 14, -1, -1, -1},
+ { 4, 6, 8, 14, -1, -1, -1, -1},
+ { 0, 4, 6, 8, 14, -1, -1, -1},
+ { 2, 4, 6, 8, 14, -1, -1, -1},
+ { 0, 2, 4, 6, 8, 14, -1, -1},
+ {10, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 10, 14, -1, -1, -1, -1, -1},
+ { 2, 10, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 10, 14, -1, -1, -1, -1},
+ { 4, 10, 14, -1, -1, -1, -1, -1},
+ { 0, 4, 10, 14, -1, -1, -1, -1},
+ { 2, 4, 10, 14, -1, -1, -1, -1},
+ { 0, 2, 4, 10, 14, -1, -1, -1},
+ { 6, 10, 14, -1, -1, -1, -1, -1},
+ { 0, 6, 10, 14, -1, -1, -1, -1},
+ { 2, 6, 10, 14, -1, -1, -1, -1},
+ { 0, 2, 6, 10, 14, -1, -1, -1},
+ { 4, 6, 10, 14, -1, -1, -1, -1},
+ { 0, 4, 6, 10, 14, -1, -1, -1},
+ { 2, 4, 6, 10, 14, -1, -1, -1},
+ { 0, 2, 4, 6, 10, 14, -1, -1},
+ { 8, 10, 14, -1, -1, -1, -1, -1},
+ { 0, 8, 10, 14, -1, -1, -1, -1},
+ { 2, 8, 10, 14, -1, -1, -1, -1},
+ { 0, 2, 8, 10, 14, -1, -1, -1},
+ { 4, 8, 10, 14, -1, -1, -1, -1},
+ { 0, 4, 8, 10, 14, -1, -1, -1},
+ { 2, 4, 8, 10, 14, -1, -1, -1},
+ { 0, 2, 4, 8, 10, 14, -1, -1},
+ { 6, 8, 10, 14, -1, -1, -1, -1},
+ { 0, 6, 8, 10, 14, -1, -1, -1},
+ { 2, 6, 8, 10, 14, -1, -1, -1},
+ { 0, 2, 6, 8, 10, 14, -1, -1},
+ { 4, 6, 8, 10, 14, -1, -1, -1},
+ { 0, 4, 6, 8, 10, 14, -1, -1},
+ { 2, 4, 6, 8, 10, 14, -1, -1},
+ { 0, 2, 4, 6, 8, 10, 14, -1},
+ {12, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 12, 14, -1, -1, -1, -1, -1},
+ { 2, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 12, 14, -1, -1, -1, -1},
+ { 4, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 4, 12, 14, -1, -1, -1, -1},
+ { 2, 4, 12, 14, -1, -1, -1, -1},
+ { 0, 2, 4, 12, 14, -1, -1, -1},
+ { 6, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 6, 12, 14, -1, -1, -1, -1},
+ { 2, 6, 12, 14, -1, -1, -1, -1},
+ { 0, 2, 6, 12, 14, -1, -1, -1},
+ { 4, 6, 12, 14, -1, -1, -1, -1},
+ { 0, 4, 6, 12, 14, -1, -1, -1},
+ { 2, 4, 6, 12, 14, -1, -1, -1},
+ { 0, 2, 4, 6, 12, 14, -1, -1},
+ { 8, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 8, 12, 14, -1, -1, -1, -1},
+ { 2, 8, 12, 14, -1, -1, -1, -1},
+ { 0, 2, 8, 12, 14, -1, -1, -1},
+ { 4, 8, 12, 14, -1, -1, -1, -1},
+ { 0, 4, 8, 12, 14, -1, -1, -1},
+ { 2, 4, 8, 12, 14, -1, -1, -1},
+ { 0, 2, 4, 8, 12, 14, -1, -1},
+ { 6, 8, 12, 14, -1, -1, -1, -1},
+ { 0, 6, 8, 12, 14, -1, -1, -1},
+ { 2, 6, 8, 12, 14, -1, -1, -1},
+ { 0, 2, 6, 8, 12, 14, -1, -1},
+ { 4, 6, 8, 12, 14, -1, -1, -1},
+ { 0, 4, 6, 8, 12, 14, -1, -1},
+ { 2, 4, 6, 8, 12, 14, -1, -1},
+ { 0, 2, 4, 6, 8, 12, 14, -1},
+ {10, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 10, 12, 14, -1, -1, -1, -1},
+ { 2, 10, 12, 14, -1, -1, -1, -1},
+ { 0, 2, 10, 12, 14, -1, -1, -1},
+ { 4, 10, 12, 14, -1, -1, -1, -1},
+ { 0, 4, 10, 12, 14, -1, -1, -1},
+ { 2, 4, 10, 12, 14, -1, -1, -1},
+ { 0, 2, 4, 10, 12, 14, -1, -1},
+ { 6, 10, 12, 14, -1, -1, -1, -1},
+ { 0, 6, 10, 12, 14, -1, -1, -1},
+ { 2, 6, 10, 12, 14, -1, -1, -1},
+ { 0, 2, 6, 10, 12, 14, -1, -1},
+ { 4, 6, 10, 12, 14, -1, -1, -1},
+ { 0, 4, 6, 10, 12, 14, -1, -1},
+ { 2, 4, 6, 10, 12, 14, -1, -1},
+ { 0, 2, 4, 6, 10, 12, 14, -1},
+ { 8, 10, 12, 14, -1, -1, -1, -1},
+ { 0, 8, 10, 12, 14, -1, -1, -1},
+ { 2, 8, 10, 12, 14, -1, -1, -1},
+ { 0, 2, 8, 10, 12, 14, -1, -1},
+ { 4, 8, 10, 12, 14, -1, -1, -1},
+ { 0, 4, 8, 10, 12, 14, -1, -1},
+ { 2, 4, 8, 10, 12, 14, -1, -1},
+ { 0, 2, 4, 8, 10, 12, 14, -1},
+ { 6, 8, 10, 12, 14, -1, -1, -1},
+ { 0, 6, 8, 10, 12, 14, -1, -1},
+ { 2, 6, 8, 10, 12, 14, -1, -1},
+ { 0, 2, 6, 8, 10, 12, 14, -1},
+ { 4, 6, 8, 10, 12, 14, -1, -1},
+ { 0, 4, 6, 8, 10, 12, 14, -1},
+ { 2, 4, 6, 8, 10, 12, 14, -1},
+ { 0, 2, 4, 6, 8, 10, 12, 14}
+};
+#endif
+
+#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a)
+#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a)
+
+unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf)
+{
+ unsigned int ctr, pos;
+ uint16_t val0, val1;
+ uint32_t good;
+#ifdef BMI
+ uint64_t idx0, idx1, idx2, idx3;
+#endif
+ const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]);
+ const __m256i ones = _mm256_set1_epi8(1);
+ const __m256i mask = _mm256_set1_epi16(0xFFF);
+ const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10,
+ 9, 8, 8, 7, 6, 5, 5, 4,
+ 11,10,10, 9, 8, 7, 7, 6,
+ 5, 4, 4, 3, 2, 1, 1, 0);
+ __m256i f0, f1, g0, g1, g2, g3;
+ __m128i f, t, pilo, pihi;
+
+ ctr = pos = 0;
+ while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 56) {
+ f0 = _mm256_loadu_si256((__m256i *)&buf[pos]);
+ f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]);
+ f0 = _mm256_permute4x64_epi64(f0, 0x94);
+ f1 = _mm256_permute4x64_epi64(f1, 0x94);
+ f0 = _mm256_shuffle_epi8(f0, idx8);
+ f1 = _mm256_shuffle_epi8(f1, idx8);
+ g0 = _mm256_srli_epi16(f0, 4);
+ g1 = _mm256_srli_epi16(f1, 4);
+ f0 = _mm256_blend_epi16(f0, g0, 0xAA);
+ f1 = _mm256_blend_epi16(f1, g1, 0xAA);
+ f0 = _mm256_and_si256(f0, mask);
+ f1 = _mm256_and_si256(f1, mask);
+ pos += 48;
+
+ g0 = _mm256_cmpgt_epi16(bound, f0);
+ g1 = _mm256_cmpgt_epi16(bound, f1);
+
+ g0 = _mm256_packs_epi16(g0, g1);
+ good = _mm256_movemask_epi8(g0);
+
+#ifdef BMI
+ idx0 = _pdep_u64(good >> 0, 0x0101010101010101);
+ idx1 = _pdep_u64(good >> 8, 0x0101010101010101);
+ idx2 = _pdep_u64(good >> 16, 0x0101010101010101);
+ idx3 = _pdep_u64(good >> 24, 0x0101010101010101);
+ idx0 = (idx0 << 8) - idx0;
+ idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
+ idx1 = (idx1 << 8) - idx1;
+ idx1 = _pext_u64(0x0E0C0A0806040200, idx1);
+ idx2 = (idx2 << 8) - idx2;
+ idx2 = _pext_u64(0x0E0C0A0806040200, idx2);
+ idx3 = (idx3 << 8) - idx3;
+ idx3 = _pext_u64(0x0E0C0A0806040200, idx3);
+
+ g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0));
+ g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1));
+ g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1);
+ g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1);
+#else
+ g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF]));
+ g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF]));
+ g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1);
+ g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1);
+#endif
+
+ g2 = _mm256_add_epi8(g0, ones);
+ g3 = _mm256_add_epi8(g1, ones);
+ g0 = _mm256_unpacklo_epi8(g0, g2);
+ g1 = _mm256_unpacklo_epi8(g1, g3);
+
+ f0 = _mm256_shuffle_epi8(f0, g0);
+ f1 = _mm256_shuffle_epi8(f1, g1);
+
+ _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0));
+ ctr += _mm_popcnt_u32((good >> 0) & 0xFF);
+ _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1));
+ ctr += _mm_popcnt_u32((good >> 16) & 0xFF);
+ _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1));
+ ctr += _mm_popcnt_u32((good >> 8) & 0xFF);
+ _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1));
+ ctr += _mm_popcnt_u32((good >> 24) & 0xFF);
+ }
+
+ while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 16) {
+ f = _mm_loadu_si128((__m128i *)&buf[pos]);
+ f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8));
+ t = _mm_srli_epi16(f, 4);
+ f = _mm_blend_epi16(f, t, 0xAA);
+ f = _mm_and_si128(f, _mm256_castsi256_si128(mask));
+ pos += 12;
+
+ t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f);
+ good = _mm_movemask_epi8(t);
+
+#ifdef BMI
+ good &= 0x5555;
+ idx0 = _pdep_u64(good, 0x1111111111111111);
+ idx0 = (idx0 << 8) - idx0;
+ idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
+ pilo = _mm_cvtsi64_si128(idx0);
+#else
+ good = _pext_u32(good, 0x5555);
+ pilo = _mm_loadl_epi64((__m128i *)&idx[good]);
+#endif
+
+ pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones));
+ pilo = _mm_unpacklo_epi8(pilo, pihi);
+ f = _mm_shuffle_epi8(f, pilo);
+ _mm_storeu_si128((__m128i *)&r[ctr], f);
+ ctr += _mm_popcnt_u32(good);
+ }
+
+ while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) {
+ val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF;
+ val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4));
+ pos += 3;
+
+ if(val0 < KYBER_Q)
+ r[ctr++] = val0;
+ if(val1 < KYBER_Q && ctr < KYBER_N)
+ r[ctr++] = val1;
+ }
+
+ return ctr;
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.h
new file mode 100644
index 0000000000..3be5e2192e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/rejsample.h
@@ -0,0 +1,14 @@
+#ifndef REJSAMPLE_H
+#define REJSAMPLE_H
+
+#include
+#include "params.h"
+#include "symmetric.h"
+
+#define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES)
+#define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES)
+
+#define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx)
+unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.S
new file mode 100644
index 0000000000..18325ebec0
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.S
@@ -0,0 +1,255 @@
+#include "consts.h"
+.include "fq.inc"
+.include "shuffle.inc"
+
+/*
+nttpack_avx:
+#load
+vmovdqa (%rdi),%ymm4
+vmovdqa 32(%rdi),%ymm5
+vmovdqa 64(%rdi),%ymm6
+vmovdqa 96(%rdi),%ymm7
+vmovdqa 128(%rdi),%ymm8
+vmovdqa 160(%rdi),%ymm9
+vmovdqa 192(%rdi),%ymm10
+vmovdqa 224(%rdi),%ymm11
+
+shuffle1 4,5,3,5
+shuffle1 6,7,4,7
+shuffle1 8,9,6,9
+shuffle1 10,11,8,11
+
+shuffle2 3,4,10,4
+shuffle2 6,8,3,8
+shuffle2 5,7,6,7
+shuffle2 9,11,5,11
+
+shuffle4 10,3,9,3
+shuffle4 6,5,10,5
+shuffle4 4,8,6,8
+shuffle4 7,11,4,11
+
+shuffle8 9,10,7,10
+shuffle8 6,4,9,4
+shuffle8 3,5,6,5
+shuffle8 8,11,3,11
+
+#store
+vmovdqa %ymm7,(%rdi)
+vmovdqa %ymm9,32(%rdi)
+vmovdqa %ymm6,64(%rdi)
+vmovdqa %ymm3,96(%rdi)
+vmovdqa %ymm10,128(%rdi)
+vmovdqa %ymm4,160(%rdi)
+vmovdqa %ymm5,192(%rdi)
+vmovdqa %ymm11,224(%rdi)
+
+ret
+*/
+
+.text
+nttunpack128_avx:
+#load
+vmovdqa (%rdi),%ymm4
+vmovdqa 32(%rdi),%ymm5
+vmovdqa 64(%rdi),%ymm6
+vmovdqa 96(%rdi),%ymm7
+vmovdqa 128(%rdi),%ymm8
+vmovdqa 160(%rdi),%ymm9
+vmovdqa 192(%rdi),%ymm10
+vmovdqa 224(%rdi),%ymm11
+
+shuffle8 4,8,3,8
+shuffle8 5,9,4,9
+shuffle8 6,10,5,10
+shuffle8 7,11,6,11
+
+shuffle4 3,5,7,5
+shuffle4 8,10,3,10
+shuffle4 4,6,8,6
+shuffle4 9,11,4,11
+
+shuffle2 7,8,9,8
+shuffle2 5,6,7,6
+shuffle2 3,4,5,4
+shuffle2 10,11,3,11
+
+shuffle1 9,5,10,5
+shuffle1 8,4,9,4
+shuffle1 7,3,8,3
+shuffle1 6,11,7,11
+
+#store
+vmovdqa %ymm10,(%rdi)
+vmovdqa %ymm5,32(%rdi)
+vmovdqa %ymm9,64(%rdi)
+vmovdqa %ymm4,96(%rdi)
+vmovdqa %ymm8,128(%rdi)
+vmovdqa %ymm3,160(%rdi)
+vmovdqa %ymm7,192(%rdi)
+vmovdqa %ymm11,224(%rdi)
+
+ret
+
+.global cdecl(nttunpack_avx)
+cdecl(nttunpack_avx):
+call nttunpack128_avx
+add $256,%rdi
+call nttunpack128_avx
+ret
+
+ntttobytes128_avx:
+#load
+vmovdqa (%rsi),%ymm5
+vmovdqa 32(%rsi),%ymm6
+vmovdqa 64(%rsi),%ymm7
+vmovdqa 96(%rsi),%ymm8
+vmovdqa 128(%rsi),%ymm9
+vmovdqa 160(%rsi),%ymm10
+vmovdqa 192(%rsi),%ymm11
+vmovdqa 224(%rsi),%ymm12
+
+#csubq
+csubq 5,13
+csubq 6,13
+csubq 7,13
+csubq 8,13
+csubq 9,13
+csubq 10,13
+csubq 11,13
+csubq 12,13
+
+#bitpack
+vpsllw $12,%ymm6,%ymm4
+vpor %ymm4,%ymm5,%ymm4
+
+vpsrlw $4,%ymm6,%ymm5
+vpsllw $8,%ymm7,%ymm6
+vpor %ymm5,%ymm6,%ymm5
+
+vpsrlw $8,%ymm7,%ymm6
+vpsllw $4,%ymm8,%ymm7
+vpor %ymm6,%ymm7,%ymm6
+
+vpsllw $12,%ymm10,%ymm7
+vpor %ymm7,%ymm9,%ymm7
+
+vpsrlw $4,%ymm10,%ymm8
+vpsllw $8,%ymm11,%ymm9
+vpor %ymm8,%ymm9,%ymm8
+
+vpsrlw $8,%ymm11,%ymm9
+vpsllw $4,%ymm12,%ymm10
+vpor %ymm9,%ymm10,%ymm9
+
+shuffle1 4,5,3,5
+shuffle1 6,7,4,7
+shuffle1 8,9,6,9
+
+shuffle2 3,4,8,4
+shuffle2 6,5,3,5
+shuffle2 7,9,6,9
+
+shuffle4 8,3,7,3
+shuffle4 6,4,8,4
+shuffle4 5,9,6,9
+
+shuffle8 7,8,5,8
+shuffle8 6,3,7,3
+shuffle8 4,9,6,9
+
+#store
+vmovdqu %ymm5,(%rdi)
+vmovdqu %ymm7,32(%rdi)
+vmovdqu %ymm6,64(%rdi)
+vmovdqu %ymm8,96(%rdi)
+vmovdqu %ymm3,128(%rdi)
+vmovdqu %ymm9,160(%rdi)
+
+ret
+
+.global cdecl(ntttobytes_avx)
+cdecl(ntttobytes_avx):
+#consts
+vmovdqa _16XQ*2(%rdx),%ymm0
+call ntttobytes128_avx
+add $256,%rsi
+add $192,%rdi
+call ntttobytes128_avx
+ret
+
+nttfrombytes128_avx:
+#load
+vmovdqu (%rsi),%ymm4
+vmovdqu 32(%rsi),%ymm5
+vmovdqu 64(%rsi),%ymm6
+vmovdqu 96(%rsi),%ymm7
+vmovdqu 128(%rsi),%ymm8
+vmovdqu 160(%rsi),%ymm9
+
+shuffle8 4,7,3,7
+shuffle8 5,8,4,8
+shuffle8 6,9,5,9
+
+shuffle4 3,8,6,8
+shuffle4 7,5,3,5
+shuffle4 4,9,7,9
+
+shuffle2 6,5,4,5
+shuffle2 8,7,6,7
+shuffle2 3,9,8,9
+
+shuffle1 4,7,10,7
+shuffle1 5,8,4,8
+shuffle1 6,9,5,9
+
+#bitunpack
+vpsrlw $12,%ymm10,%ymm11
+vpsllw $4,%ymm7,%ymm12
+vpor %ymm11,%ymm12,%ymm11
+vpand %ymm0,%ymm10,%ymm10
+vpand %ymm0,%ymm11,%ymm11
+
+vpsrlw $8,%ymm7,%ymm12
+vpsllw $8,%ymm4,%ymm13
+vpor %ymm12,%ymm13,%ymm12
+vpand %ymm0,%ymm12,%ymm12
+
+vpsrlw $4,%ymm4,%ymm13
+vpand %ymm0,%ymm13,%ymm13
+
+vpsrlw $12,%ymm8,%ymm14
+vpsllw $4,%ymm5,%ymm15
+vpor %ymm14,%ymm15,%ymm14
+vpand %ymm0,%ymm8,%ymm8
+vpand %ymm0,%ymm14,%ymm14
+
+vpsrlw $8,%ymm5,%ymm15
+vpsllw $8,%ymm9,%ymm1
+vpor %ymm15,%ymm1,%ymm15
+vpand %ymm0,%ymm15,%ymm15
+
+vpsrlw $4,%ymm9,%ymm1
+vpand %ymm0,%ymm1,%ymm1
+
+#store
+vmovdqa %ymm10,(%rdi)
+vmovdqa %ymm11,32(%rdi)
+vmovdqa %ymm12,64(%rdi)
+vmovdqa %ymm13,96(%rdi)
+vmovdqa %ymm8,128(%rdi)
+vmovdqa %ymm14,160(%rdi)
+vmovdqa %ymm15,192(%rdi)
+vmovdqa %ymm1,224(%rdi)
+
+ret
+
+.global cdecl(nttfrombytes_avx)
+cdecl(nttfrombytes_avx):
+#consts
+vmovdqa _16XMASK*2(%rdx),%ymm0
+call nttfrombytes128_avx
+add $256,%rdi
+add $192,%rsi
+call nttfrombytes128_avx
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.inc
new file mode 100644
index 0000000000..73e9ffe03c
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/shuffle.inc
@@ -0,0 +1,25 @@
+.macro shuffle8 r0,r1,r2,r3
+vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2
+vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3
+.endm
+
+.macro shuffle4 r0,r1,r2,r3
+vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2
+vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3
+.endm
+
+.macro shuffle2 r0,r1,r2,r3
+#vpsllq $32,%ymm\r1,%ymm\r2
+vmovsldup %ymm\r1,%ymm\r2
+vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2
+vpsrlq $32,%ymm\r0,%ymm\r0
+#vmovshdup %ymm\r0,%ymm\r0
+vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
+.endm
+
+.macro shuffle1 r0,r1,r2,r3
+vpslld $16,%ymm\r1,%ymm\r2
+vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2
+vpsrld $16,%ymm\r0,%ymm\r0
+vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
+.endm
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric-shake.c
new file mode 100644
index 0000000000..20f451882e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric-shake.c
@@ -0,0 +1,74 @@
+#include
+#include
+#include
+#include "params.h"
+#include "symmetric.h"
+#include "fips202.h"
+
+/*************************************************
+* Name: kyber_shake128_absorb
+*
+* Description: Absorb step of the SHAKE128 specialized for the Kyber context.
+*
+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
+* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state
+* - uint8_t i: additional byte of input
+* - uint8_t j: additional byte of input
+**************************************************/
+void kyber_shake128_absorb(shake128incctx *state,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y)
+{
+ uint8_t extseed[KYBER_SYMBYTES+2];
+
+ memcpy(extseed, seed, KYBER_SYMBYTES);
+ extseed[KYBER_SYMBYTES+0] = x;
+ extseed[KYBER_SYMBYTES+1] = y;
+
+ shake128_absorb_once(state, extseed, sizeof(extseed));
+}
+
+/*************************************************
+* Name: kyber_shake256_prf
+*
+* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
+* and then generates outlen bytes of SHAKE256 output
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: number of requested output bytes
+* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
+* - uint8_t nonce: single-byte nonce (public PRF input)
+**************************************************/
+void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t extkey[KYBER_SYMBYTES+1];
+
+ memcpy(extkey, key, KYBER_SYMBYTES);
+ extkey[KYBER_SYMBYTES] = nonce;
+
+ shake256(out, outlen, extkey, sizeof(extkey));
+}
+
+/*************************************************
+* Name: kyber_shake256_prf
+*
+* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
+* and then generates outlen bytes of SHAKE256 output
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: number of requested output bytes
+* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
+* - uint8_t nonce: single-byte nonce (public PRF input)
+**************************************************/
+void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES])
+{
+ shake256incctx s;
+
+ shake256_inc_init(&s);
+ shake256_inc_absorb(&s, key, KYBER_SYMBYTES);
+ shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES);
+ shake256_inc_finalize(&s);
+ shake256_inc_squeeze(out, KYBER_SSBYTES, &s);
+ shake256_inc_ctx_release(&s);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric.h
new file mode 100644
index 0000000000..e4941f7a86
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/symmetric.h
@@ -0,0 +1,34 @@
+#ifndef SYMMETRIC_H
+#define SYMMETRIC_H
+
+#include
+#include
+#include "params.h"
+
+#include "fips202.h"
+#include "fips202x4.h"
+
+typedef shake128incctx xof_state;
+
+#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb)
+void kyber_shake128_absorb(shake128incctx *s,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y);
+
+#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf)
+void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce);
+
+#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf)
+void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]);
+
+#define XOF_BLOCKBYTES SHAKE128_RATE
+
+#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES)
+#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES)
+#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y)
+#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE)
+#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE)
+#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT)
+
+#endif /* SYMMETRIC_H */
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/verify.c
new file mode 100644
index 0000000000..aa8e2850b1
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_avx2/verify.c
@@ -0,0 +1,73 @@
+#include
+#include
+#include
+#include "verify.h"
+
+/*************************************************
+* Name: verify
+*
+* Description: Compare two arrays for equality in constant time.
+*
+* Arguments: const uint8_t *a: pointer to first byte array
+* const uint8_t *b: pointer to second byte array
+* size_t len: length of the byte arrays
+*
+* Returns 0 if the byte arrays are equal, 1 otherwise
+**************************************************/
+int verify(const uint8_t *a, const uint8_t *b, size_t len)
+{
+ size_t i;
+ uint64_t r;
+ __m256i f, g, h;
+
+ h = _mm256_setzero_si256();
+ for(i=0;i> 63;
+ return r;
+}
+
+/*************************************************
+* Name: cmov
+*
+* Description: Copy len bytes from x to r if b is 1;
+* don't modify x if b is 0. Requires b to be in {0,1};
+* assumes two's complement representation of negative integers.
+* Runs in constant time.
+*
+* Arguments: uint8_t *r: pointer to output byte array
+* const uint8_t *x: pointer to input byte array
+* size_t len: Amount of bytes to be copied
+* uint8_t b: Condition bit; has to be in {0,1}
+**************************************************/
+void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b)
+{
+ size_t i;
+ __m256i xvec, rvec, bvec;
+
+ bvec = _mm256_set1_epi64x(-(uint64_t)b);
+ for(i=0;i
+#include
+#include "params.h"
+
+#define verify KYBER_NAMESPACE(verify)
+int verify(const uint8_t *a, const uint8_t *b, size_t len);
+
+#define cmov KYBER_NAMESPACE(cmov)
+void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/LICENSE
new file mode 100644
index 0000000000..7922ab8007
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/LICENSE
@@ -0,0 +1,6 @@
+Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/);
+or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html).
+
+For Keccak and AES we are using public-domain
+code from sources and by authors listed in
+comments on top of the respective files.
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/api.h
new file mode 100644
index 0000000000..70d40f3f3e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/api.h
@@ -0,0 +1,66 @@
+#ifndef API_H
+#define API_H
+
+#include
+
+#define pqcrystals_kyber512_SECRETKEYBYTES 1632
+#define pqcrystals_kyber512_PUBLICKEYBYTES 800
+#define pqcrystals_kyber512_CIPHERTEXTBYTES 768
+#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber512_ENCCOINBYTES 32
+#define pqcrystals_kyber512_BYTES 32
+
+#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES
+#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES
+#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES
+#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES
+#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES
+#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES
+
+int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber768_SECRETKEYBYTES 2400
+#define pqcrystals_kyber768_PUBLICKEYBYTES 1184
+#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088
+#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber768_ENCCOINBYTES 32
+#define pqcrystals_kyber768_BYTES 32
+
+#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES
+#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES
+#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES
+#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES
+#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES
+#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES
+
+int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber1024_SECRETKEYBYTES 3168
+#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568
+#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568
+#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber1024_ENCCOINBYTES 32
+#define pqcrystals_kyber1024_BYTES 32
+
+#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES
+#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES
+#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES
+#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES
+#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES
+#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES
+
+int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.c
new file mode 100644
index 0000000000..1500ffea56
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.c
@@ -0,0 +1,128 @@
+#include
+#include "params.h"
+#include "cbd.h"
+
+/*************************************************
+* Name: load32_littleendian
+*
+* Description: load 4 bytes into a 32-bit integer
+* in little-endian order
+*
+* Arguments: - const uint8_t *x: pointer to input byte array
+*
+* Returns 32-bit unsigned integer loaded from x
+**************************************************/
+static uint32_t load32_littleendian(const uint8_t x[4])
+{
+ uint32_t r;
+ r = (uint32_t)x[0];
+ r |= (uint32_t)x[1] << 8;
+ r |= (uint32_t)x[2] << 16;
+ r |= (uint32_t)x[3] << 24;
+ return r;
+}
+
+/*************************************************
+* Name: load24_littleendian
+*
+* Description: load 3 bytes into a 32-bit integer
+* in little-endian order.
+* This function is only needed for Kyber-512
+*
+* Arguments: - const uint8_t *x: pointer to input byte array
+*
+* Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
+**************************************************/
+#if KYBER_ETA1 == 3
+static uint32_t load24_littleendian(const uint8_t x[3])
+{
+ uint32_t r;
+ r = (uint32_t)x[0];
+ r |= (uint32_t)x[1] << 8;
+ r |= (uint32_t)x[2] << 16;
+ return r;
+}
+#endif
+
+
+/*************************************************
+* Name: cbd2
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *buf: pointer to input byte array
+**************************************************/
+static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4])
+{
+ unsigned int i,j;
+ uint32_t t,d;
+ int16_t a,b;
+
+ for(i=0;i>1) & 0x55555555;
+
+ for(j=0;j<8;j++) {
+ a = (d >> (4*j+0)) & 0x3;
+ b = (d >> (4*j+2)) & 0x3;
+ r->coeffs[8*i+j] = a - b;
+ }
+ }
+}
+
+/*************************************************
+* Name: cbd3
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=3.
+* This function is only needed for Kyber-512
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *buf: pointer to input byte array
+**************************************************/
+#if KYBER_ETA1 == 3
+static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4])
+{
+ unsigned int i,j;
+ uint32_t t,d;
+ int16_t a,b;
+
+ for(i=0;i>1) & 0x00249249;
+ d += (t>>2) & 0x00249249;
+
+ for(j=0;j<4;j++) {
+ a = (d >> (6*j+0)) & 0x7;
+ b = (d >> (6*j+3)) & 0x7;
+ r->coeffs[4*i+j] = a - b;
+ }
+ }
+}
+#endif
+
+void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4])
+{
+#if KYBER_ETA1 == 2
+ cbd2(r, buf);
+#elif KYBER_ETA1 == 3
+ cbd3(r, buf);
+#else
+#error "This implementation requires eta1 in {2,3}"
+#endif
+}
+
+void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4])
+{
+#if KYBER_ETA2 == 2
+ cbd2(r, buf);
+#else
+#error "This implementation requires eta2 = 2"
+#endif
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.h
new file mode 100644
index 0000000000..7b677d745d
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/cbd.h
@@ -0,0 +1,14 @@
+#ifndef CBD_H
+#define CBD_H
+
+#include
+#include "params.h"
+#include "poly.h"
+
+#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1)
+void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]);
+
+#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2)
+void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/indcpa.c
new file mode 100644
index 0000000000..4a8b4c894f
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/indcpa.c
@@ -0,0 +1,331 @@
+#include
+#include
+#include
+#include "params.h"
+#include "indcpa.h"
+#include "polyvec.h"
+#include "poly.h"
+#include "ntt.h"
+#include "symmetric.h"
+#include "randombytes.h"
+
+/*************************************************
+* Name: pack_pk
+*
+* Description: Serialize the public key as concatenation of the
+* serialized vector of polynomials pk
+* and the public seed used to generate the matrix A.
+*
+* Arguments: uint8_t *r: pointer to the output serialized public key
+* polyvec *pk: pointer to the input public-key polyvec
+* const uint8_t *seed: pointer to the input public seed
+**************************************************/
+static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES],
+ polyvec *pk,
+ const uint8_t seed[KYBER_SYMBYTES])
+{
+ polyvec_tobytes(r, pk);
+ memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES);
+}
+
+/*************************************************
+* Name: unpack_pk
+*
+* Description: De-serialize public key from a byte array;
+* approximate inverse of pack_pk
+*
+* Arguments: - polyvec *pk: pointer to output public-key polynomial vector
+* - uint8_t *seed: pointer to output seed to generate matrix A
+* - const uint8_t *packedpk: pointer to input serialized public key
+**************************************************/
+static void unpack_pk(polyvec *pk,
+ uint8_t seed[KYBER_SYMBYTES],
+ const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES])
+{
+ polyvec_frombytes(pk, packedpk);
+ memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES);
+}
+
+/*************************************************
+* Name: pack_sk
+*
+* Description: Serialize the secret key
+*
+* Arguments: - uint8_t *r: pointer to output serialized secret key
+* - polyvec *sk: pointer to input vector of polynomials (secret key)
+**************************************************/
+static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk)
+{
+ polyvec_tobytes(r, sk);
+}
+
+/*************************************************
+* Name: unpack_sk
+*
+* Description: De-serialize the secret key; inverse of pack_sk
+*
+* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key)
+* - const uint8_t *packedsk: pointer to input serialized secret key
+**************************************************/
+static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES])
+{
+ polyvec_frombytes(sk, packedsk);
+}
+
+/*************************************************
+* Name: pack_ciphertext
+*
+* Description: Serialize the ciphertext as concatenation of the
+* compressed and serialized vector of polynomials b
+* and the compressed and serialized polynomial v
+*
+* Arguments: uint8_t *r: pointer to the output serialized ciphertext
+* poly *pk: pointer to the input vector of polynomials b
+* poly *v: pointer to the input polynomial v
+**************************************************/
+static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v)
+{
+ polyvec_compress(r, b);
+ poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v);
+}
+
+/*************************************************
+* Name: unpack_ciphertext
+*
+* Description: De-serialize and decompress ciphertext from a byte array;
+* approximate inverse of pack_ciphertext
+*
+* Arguments: - polyvec *b: pointer to the output vector of polynomials b
+* - poly *v: pointer to the output polynomial v
+* - const uint8_t *c: pointer to the input serialized ciphertext
+**************************************************/
+static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES])
+{
+ polyvec_decompress(b, c);
+ poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES);
+}
+
+/*************************************************
+* Name: rej_uniform
+*
+* Description: Run rejection sampling on uniform random bytes to generate
+* uniform random integers mod q
+*
+* Arguments: - int16_t *r: pointer to output buffer
+* - unsigned int len: requested number of 16-bit integers (uniform mod q)
+* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes)
+* - unsigned int buflen: length of input buffer in bytes
+*
+* Returns number of sampled 16-bit integers (at most len)
+**************************************************/
+static unsigned int rej_uniform(int16_t *r,
+ unsigned int len,
+ const uint8_t *buf,
+ unsigned int buflen)
+{
+ unsigned int ctr, pos;
+ uint16_t val0, val1;
+
+ ctr = pos = 0;
+ while(ctr < len && pos + 3 <= buflen) {
+ val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF;
+ val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF;
+ pos += 3;
+
+ if(val0 < KYBER_Q)
+ r[ctr++] = val0;
+ if(ctr < len && val1 < KYBER_Q)
+ r[ctr++] = val1;
+ }
+
+ return ctr;
+}
+
+#define gen_a(A,B) gen_matrix(A,B,0)
+#define gen_at(A,B) gen_matrix(A,B,1)
+
+/*************************************************
+* Name: gen_matrix
+*
+* Description: Deterministically generate matrix A (or the transpose of A)
+* from a seed. Entries of the matrix are polynomials that look
+* uniformly random. Performs rejection sampling on output of
+* a XOF
+*
+* Arguments: - polyvec *a: pointer to ouptput matrix A
+* - const uint8_t *seed: pointer to input seed
+* - int transposed: boolean deciding whether A or A^T is generated
+**************************************************/
+#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES)
+// Not static for benchmarking
+void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed)
+{
+ unsigned int ctr, i, j, k;
+ unsigned int buflen, off;
+ uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2];
+ xof_state state;
+ xof_init(&state, seed);
+
+ for(i=0;i
+#include "params.h"
+#include "polyvec.h"
+
+#define gen_matrix KYBER_NAMESPACE(gen_matrix)
+void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed);
+
+#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand)
+void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_enc KYBER_NAMESPACE(indcpa_enc)
+void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_dec KYBER_NAMESPACE(indcpa_dec)
+void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.c
new file mode 100644
index 0000000000..63abc1029c
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.c
@@ -0,0 +1,169 @@
+#include
+#include
+#include
+#include "params.h"
+#include "kem.h"
+#include "indcpa.h"
+#include "verify.h"
+#include "symmetric.h"
+#include "randombytes.h"
+/*************************************************
+* Name: crypto_kem_keypair_derand
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+* - uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_keypair_derand(uint8_t *pk,
+ uint8_t *sk,
+ const uint8_t *coins)
+{
+ indcpa_keypair_derand(pk, sk, coins);
+ memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ /* Value z for pseudo-random output on reject */
+ memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_keypair
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_keypair(uint8_t *pk,
+ uint8_t *sk)
+{
+ uint8_t coins[2*KYBER_SYMBYTES];
+ randombytes(coins, 2*KYBER_SYMBYTES);
+ crypto_kem_keypair_derand(pk, sk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc_derand
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - const uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_enc_derand(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk,
+ const uint8_t *coins)
+{
+ uint8_t buf[2*KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2*KYBER_SYMBYTES];
+
+ memcpy(buf, coins, KYBER_SYMBYTES);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_g(kr, buf, 2*KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES);
+
+ memcpy(ss,kr,KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_enc(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk)
+{
+ uint8_t coins[KYBER_SYMBYTES];
+ randombytes(coins, KYBER_SYMBYTES);
+ crypto_kem_enc_derand(ct, ss, pk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_dec
+*
+* Description: Generates shared secret for given
+* cipher text and private key
+*
+* Arguments: - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *ct: pointer to input cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - const uint8_t *sk: pointer to input private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0.
+*
+* On failure, ss will contain a pseudo-random value.
+**************************************************/
+int crypto_kem_dec(uint8_t *ss,
+ const uint8_t *ct,
+ const uint8_t *sk)
+{
+ int fail;
+ uint8_t buf[2*KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2*KYBER_SYMBYTES];
+ uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES];
+ const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES;
+
+ indcpa_dec(buf, ct, sk);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES);
+ hash_g(kr, buf, 2*KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES);
+
+ fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES);
+
+ /* Compute rejection key */
+ rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct);
+
+ /* Copy true key to return buffer if fail is false */
+ cmov(ss,kr,KYBER_SYMBYTES,!fail);
+
+ return 0;
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.h
new file mode 100644
index 0000000000..234f11966b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/kem.h
@@ -0,0 +1,35 @@
+#ifndef KEM_H
+#define KEM_H
+
+#include
+#include "params.h"
+
+#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES
+#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES
+#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES
+#define CRYPTO_BYTES KYBER_SSBYTES
+
+#if (KYBER_K == 2)
+#define CRYPTO_ALGNAME "Kyber512"
+#elif (KYBER_K == 3)
+#define CRYPTO_ALGNAME "Kyber768"
+#elif (KYBER_K == 4)
+#define CRYPTO_ALGNAME "Kyber1024"
+#endif
+
+#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand)
+int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+
+#define crypto_kem_keypair KYBER_NAMESPACE(keypair)
+int crypto_kem_keypair(uint8_t *pk, uint8_t *sk);
+
+#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand)
+int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+
+#define crypto_kem_enc KYBER_NAMESPACE(enc)
+int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+
+#define crypto_kem_dec KYBER_NAMESPACE(dec)
+int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.c
new file mode 100644
index 0000000000..2f2eb10b2f
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.c
@@ -0,0 +1,146 @@
+#include
+#include "params.h"
+#include "ntt.h"
+#include "reduce.h"
+
+/* Code to generate zetas and zetas_inv used in the number-theoretic transform:
+
+#define KYBER_ROOT_OF_UNITY 17
+
+static const uint8_t tree[128] = {
+ 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120,
+ 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124,
+ 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122,
+ 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126,
+ 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121,
+ 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125,
+ 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123,
+ 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127
+};
+
+void init_ntt() {
+ unsigned int i;
+ int16_t tmp[128];
+
+ tmp[0] = MONT;
+ for(i=1;i<128;i++)
+ tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q);
+
+ for(i=0;i<128;i++) {
+ zetas[i] = tmp[tree[i]];
+ if(zetas[i] > KYBER_Q/2)
+ zetas[i] -= KYBER_Q;
+ if(zetas[i] < -KYBER_Q/2)
+ zetas[i] += KYBER_Q;
+ }
+}
+*/
+
+const int16_t zetas[128] = {
+ -1044, -758, -359, -1517, 1493, 1422, 287, 202,
+ -171, 622, 1577, 182, 962, -1202, -1474, 1468,
+ 573, -1325, 264, 383, -829, 1458, -1602, -130,
+ -681, 1017, 732, 608, -1542, 411, -205, -1571,
+ 1223, 652, -552, 1015, -1293, 1491, -282, -1544,
+ 516, -8, -320, -666, -1618, -1162, 126, 1469,
+ -853, -90, -271, 830, 107, -1421, -247, -951,
+ -398, 961, -1508, -725, 448, -1065, 677, -1275,
+ -1103, 430, 555, 843, -1251, 871, 1550, 105,
+ 422, 587, 177, -235, -291, -460, 1574, 1653,
+ -246, 778, 1159, -147, -777, 1483, -602, 1119,
+ -1590, 644, -872, 349, 418, 329, -156, -75,
+ 817, 1097, 603, 610, 1322, -1285, -1465, 384,
+ -1215, -136, 1218, -1335, -874, 220, -1187, -1659,
+ -1185, -1530, -1278, 794, -1510, -854, -870, 478,
+ -108, -308, 996, 991, 958, -1460, 1522, 1628
+};
+
+/*************************************************
+* Name: fqmul
+*
+* Description: Multiplication followed by Montgomery reduction
+*
+* Arguments: - int16_t a: first factor
+* - int16_t b: second factor
+*
+* Returns 16-bit integer congruent to a*b*R^{-1} mod q
+**************************************************/
+static int16_t fqmul(int16_t a, int16_t b) {
+ return montgomery_reduce((int32_t)a*b);
+}
+
+/*************************************************
+* Name: ntt
+*
+* Description: Inplace number-theoretic transform (NTT) in Rq.
+* input is in standard order, output is in bitreversed order
+*
+* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq
+**************************************************/
+void ntt(int16_t r[256]) {
+ unsigned int len, start, j, k;
+ int16_t t, zeta;
+
+ k = 1;
+ for(len = 128; len >= 2; len >>= 1) {
+ for(start = 0; start < 256; start = j + len) {
+ zeta = zetas[k++];
+ for(j = start; j < start + len; j++) {
+ t = fqmul(zeta, r[j + len]);
+ r[j + len] = r[j] - t;
+ r[j] = r[j] + t;
+ }
+ }
+ }
+}
+
+/*************************************************
+* Name: invntt_tomont
+*
+* Description: Inplace inverse number-theoretic transform in Rq and
+* multiplication by Montgomery factor 2^16.
+* Input is in bitreversed order, output is in standard order
+*
+* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq
+**************************************************/
+void invntt(int16_t r[256]) {
+ unsigned int start, len, j, k;
+ int16_t t, zeta;
+ const int16_t f = 1441; // mont^2/128
+
+ k = 127;
+ for(len = 2; len <= 128; len <<= 1) {
+ for(start = 0; start < 256; start = j + len) {
+ zeta = zetas[k--];
+ for(j = start; j < start + len; j++) {
+ t = r[j];
+ r[j] = barrett_reduce(t + r[j + len]);
+ r[j + len] = r[j + len] - t;
+ r[j + len] = fqmul(zeta, r[j + len]);
+ }
+ }
+ }
+
+ for(j = 0; j < 256; j++)
+ r[j] = fqmul(r[j], f);
+}
+
+/*************************************************
+* Name: basemul
+*
+* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta)
+* used for multiplication of elements in Rq in NTT domain
+*
+* Arguments: - int16_t r[2]: pointer to the output polynomial
+* - const int16_t a[2]: pointer to the first factor
+* - const int16_t b[2]: pointer to the second factor
+* - int16_t zeta: integer defining the reduction polynomial
+**************************************************/
+void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta)
+{
+ r[0] = fqmul(a[1], b[1]);
+ r[0] = fqmul(r[0], zeta);
+ r[0] += fqmul(a[0], b[0]);
+ r[1] = fqmul(a[0], b[1]);
+ r[1] += fqmul(a[1], b[0]);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.h
new file mode 100644
index 0000000000..227ea74f08
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/ntt.h
@@ -0,0 +1,19 @@
+#ifndef NTT_H
+#define NTT_H
+
+#include
+#include "params.h"
+
+#define zetas KYBER_NAMESPACE(zetas)
+extern const int16_t zetas[128];
+
+#define ntt KYBER_NAMESPACE(ntt)
+void ntt(int16_t poly[256]);
+
+#define invntt KYBER_NAMESPACE(invntt)
+void invntt(int16_t poly[256]);
+
+#define basemul KYBER_NAMESPACE(basemul)
+void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/params.h
new file mode 100644
index 0000000000..36b2b987f3
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/params.h
@@ -0,0 +1,55 @@
+#ifndef PARAMS_H
+#define PARAMS_H
+
+#ifndef KYBER_K
+#define KYBER_K 3 /* Change this for different security strengths */
+#endif
+
+
+/* Don't change parameters below this line */
+#if (KYBER_K == 2)
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_ref_##s
+#elif (KYBER_K == 3)
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_ref_##s
+#elif (KYBER_K == 4)
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_ref_##s
+#else
+#error "KYBER_K must be in {2,3,4}"
+#endif
+
+#define KYBER_N 256
+#define KYBER_Q 3329
+
+#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */
+#define KYBER_SSBYTES 32 /* size in bytes of shared key */
+
+#define KYBER_POLYBYTES 384
+#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES)
+
+#if KYBER_K == 2
+#define KYBER_ETA1 3
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 3
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 4
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 160
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352)
+#endif
+
+#define KYBER_ETA2 2
+
+#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES)
+#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES)
+#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES)
+#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES)
+
+#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES)
+/* 32 bytes of additional space to save H(pk) */
+#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES)
+#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES)
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.c
new file mode 100644
index 0000000000..0fe5a20f63
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.c
@@ -0,0 +1,360 @@
+#include
+#include "params.h"
+#include "poly.h"
+#include "ntt.h"
+#include "reduce.h"
+#include "cbd.h"
+#include "symmetric.h"
+
+/*************************************************
+* Name: poly_compress
+*
+* Description: Compression and subsequent serialization of a polynomial
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (of length KYBER_POLYCOMPRESSEDBYTES)
+* - const poly *a: pointer to input polynomial
+**************************************************/
+void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
+{
+ unsigned int i,j;
+ int32_t u;
+ uint32_t d0;
+ uint8_t t[8];
+
+#if (KYBER_POLYCOMPRESSEDBYTES == 128)
+
+ for(i=0;icoeffs[8*i+j];
+ u += (u >> 15) & KYBER_Q;
+/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
+ d0 = u << 4;
+ d0 += 1665;
+ d0 *= 80635;
+ d0 >>= 28;
+ t[j] = d0 & 0xf;
+ }
+
+ r[0] = t[0] | (t[1] << 4);
+ r[1] = t[2] | (t[3] << 4);
+ r[2] = t[4] | (t[5] << 4);
+ r[3] = t[6] | (t[7] << 4);
+ r += 4;
+ }
+#elif (KYBER_POLYCOMPRESSEDBYTES == 160)
+ for(i=0;icoeffs[8*i+j];
+ u += (u >> 15) & KYBER_Q;
+/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
+ d0 = u << 5;
+ d0 += 1664;
+ d0 *= 40318;
+ d0 >>= 27;
+ t[j] = d0 & 0x1f;
+ }
+
+ r[0] = (t[0] >> 0) | (t[1] << 5);
+ r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7);
+ r[2] = (t[3] >> 1) | (t[4] << 4);
+ r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6);
+ r[4] = (t[6] >> 2) | (t[7] << 3);
+ r += 5;
+ }
+#else
+#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}"
+#endif
+}
+
+/*************************************************
+* Name: poly_decompress
+*
+* Description: De-serialization and subsequent decompression of a polynomial;
+* approximate inverse of poly_compress
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *a: pointer to input byte array
+* (of length KYBER_POLYCOMPRESSEDBYTES bytes)
+**************************************************/
+void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES])
+{
+ unsigned int i;
+
+#if (KYBER_POLYCOMPRESSEDBYTES == 128)
+ for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4;
+ r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4;
+ a += 1;
+ }
+#elif (KYBER_POLYCOMPRESSEDBYTES == 160)
+ unsigned int j;
+ uint8_t t[8];
+ for(i=0;i> 0);
+ t[1] = (a[0] >> 5) | (a[1] << 3);
+ t[2] = (a[1] >> 2);
+ t[3] = (a[1] >> 7) | (a[2] << 1);
+ t[4] = (a[2] >> 4) | (a[3] << 4);
+ t[5] = (a[3] >> 1);
+ t[6] = (a[3] >> 6) | (a[4] << 2);
+ t[7] = (a[4] >> 3);
+ a += 5;
+
+ for(j=0;j<8;j++)
+ r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5;
+ }
+#else
+#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}"
+#endif
+}
+
+/*************************************************
+* Name: poly_tobytes
+*
+* Description: Serialization of a polynomial
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYBYTES bytes)
+* - const poly *a: pointer to input polynomial
+**************************************************/
+void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a)
+{
+ unsigned int i;
+ uint16_t t0, t1;
+
+ for(i=0;icoeffs[2*i];
+ t0 += ((int16_t)t0 >> 15) & KYBER_Q;
+ t1 = a->coeffs[2*i+1];
+ t1 += ((int16_t)t1 >> 15) & KYBER_Q;
+ r[3*i+0] = (t0 >> 0);
+ r[3*i+1] = (t0 >> 8) | (t1 << 4);
+ r[3*i+2] = (t1 >> 4);
+ }
+}
+
+/*************************************************
+* Name: poly_frombytes
+*
+* Description: De-serialization of a polynomial;
+* inverse of poly_tobytes
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *a: pointer to input byte array
+* (of KYBER_POLYBYTES bytes)
+**************************************************/
+void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES])
+{
+ unsigned int i;
+ for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF;
+ r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF;
+ }
+}
+
+/*************************************************
+* Name: poly_frommsg
+*
+* Description: Convert 32-byte message to polynomial
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *msg: pointer to input message
+**************************************************/
+void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES])
+{
+ unsigned int i,j;
+ int16_t mask;
+
+#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8)
+#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!"
+#endif
+
+ for(i=0;i> j)&1);
+ r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2);
+ }
+ }
+}
+
+/*************************************************
+* Name: poly_tomsg
+*
+* Description: Convert polynomial to 32-byte message
+*
+* Arguments: - uint8_t *msg: pointer to output message
+* - const poly *a: pointer to input polynomial
+**************************************************/
+void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a)
+{
+ unsigned int i,j;
+ uint32_t t;
+
+ for(i=0;icoeffs[8*i+j];
+ // t += ((int16_t)t >> 15) & KYBER_Q;
+ // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1;
+ t <<= 1;
+ t += 1665;
+ t *= 80635;
+ t >>= 28;
+ t &= 1;
+ msg[i] |= t << j;
+ }
+ }
+}
+
+/*************************************************
+* Name: poly_getnoise_eta1
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA1
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t buf[KYBER_ETA1*KYBER_N/4];
+ prf(buf, sizeof(buf), seed, nonce);
+ poly_cbd_eta1(r, buf);
+}
+
+/*************************************************
+* Name: poly_getnoise_eta2
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t buf[KYBER_ETA2*KYBER_N/4];
+ prf(buf, sizeof(buf), seed, nonce);
+ poly_cbd_eta2(r, buf);
+}
+
+
+/*************************************************
+* Name: poly_ntt
+*
+* Description: Computes negacyclic number-theoretic transform (NTT) of
+* a polynomial in place;
+* inputs assumed to be in normal order, output in bitreversed order
+*
+* Arguments: - uint16_t *r: pointer to in/output polynomial
+**************************************************/
+void poly_ntt(poly *r)
+{
+ ntt(r->coeffs);
+ poly_reduce(r);
+}
+
+/*************************************************
+* Name: poly_invntt_tomont
+*
+* Description: Computes inverse of negacyclic number-theoretic transform (NTT)
+* of a polynomial in place;
+* inputs assumed to be in bitreversed order, output in normal order
+*
+* Arguments: - uint16_t *a: pointer to in/output polynomial
+**************************************************/
+void poly_invntt_tomont(poly *r)
+{
+ invntt(r->coeffs);
+}
+
+/*************************************************
+* Name: poly_basemul_montgomery
+*
+* Description: Multiplication of two polynomials in NTT domain
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_basemul_montgomery(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]);
+ basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]);
+ }
+}
+
+/*************************************************
+* Name: poly_tomont
+*
+* Description: Inplace conversion of all coefficients of a polynomial
+* from normal domain to Montgomery domain
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+void poly_tomont(poly *r)
+{
+ unsigned int i;
+ const int16_t f = (1ULL << 32) % KYBER_Q;
+ for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f);
+}
+
+/*************************************************
+* Name: poly_reduce
+*
+* Description: Applies Barrett reduction to all coefficients of a polynomial
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+void poly_reduce(poly *r)
+{
+ unsigned int i;
+ for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]);
+}
+
+/*************************************************
+* Name: poly_add
+*
+* Description: Add two polynomials; no modular reduction is performed
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_add(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i];
+}
+
+/*************************************************
+* Name: poly_sub
+*
+* Description: Subtract two polynomials; no modular reduction is performed
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_sub(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i];
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.h
new file mode 100644
index 0000000000..9a99c7cdad
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/poly.h
@@ -0,0 +1,53 @@
+#ifndef POLY_H
+#define POLY_H
+
+#include
+#include "params.h"
+
+/*
+ * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
+ * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
+ */
+typedef struct{
+ int16_t coeffs[KYBER_N];
+} poly;
+
+#define poly_compress KYBER_NAMESPACE(poly_compress)
+void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a);
+#define poly_decompress KYBER_NAMESPACE(poly_decompress)
+void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]);
+
+#define poly_tobytes KYBER_NAMESPACE(poly_tobytes)
+void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a);
+#define poly_frombytes KYBER_NAMESPACE(poly_frombytes)
+void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]);
+
+#define poly_frommsg KYBER_NAMESPACE(poly_frommsg)
+void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]);
+#define poly_tomsg KYBER_NAMESPACE(poly_tomsg)
+void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r);
+
+#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1)
+void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2)
+void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#define poly_ntt KYBER_NAMESPACE(poly_ntt)
+void poly_ntt(poly *r);
+#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont)
+void poly_invntt_tomont(poly *r);
+#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery)
+void poly_basemul_montgomery(poly *r, const poly *a, const poly *b);
+#define poly_tomont KYBER_NAMESPACE(poly_tomont)
+void poly_tomont(poly *r);
+
+#define poly_reduce KYBER_NAMESPACE(poly_reduce)
+void poly_reduce(poly *r);
+
+#define poly_add KYBER_NAMESPACE(poly_add)
+void poly_add(poly *r, const poly *a, const poly *b);
+#define poly_sub KYBER_NAMESPACE(poly_sub)
+void poly_sub(poly *r, const poly *a, const poly *b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.c
new file mode 100644
index 0000000000..661c71ec32
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.c
@@ -0,0 +1,247 @@
+#include
+#include "params.h"
+#include "poly.h"
+#include "polyvec.h"
+
+/*************************************************
+* Name: polyvec_compress
+*
+* Description: Compress and serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECCOMPRESSEDBYTES)
+* - const polyvec *a: pointer to input vector of polynomials
+**************************************************/
+void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
+{
+ unsigned int i,j,k;
+ uint64_t d0;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ uint16_t t[8];
+ for(i=0;ivec[i].coeffs[8*j+k];
+ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
+/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
+
+ }
+
+ r[ 0] = (t[0] >> 0);
+ r[ 1] = (t[0] >> 8) | (t[1] << 3);
+ r[ 2] = (t[1] >> 5) | (t[2] << 6);
+ r[ 3] = (t[2] >> 2);
+ r[ 4] = (t[2] >> 10) | (t[3] << 1);
+ r[ 5] = (t[3] >> 7) | (t[4] << 4);
+ r[ 6] = (t[4] >> 4) | (t[5] << 7);
+ r[ 7] = (t[5] >> 1);
+ r[ 8] = (t[5] >> 9) | (t[6] << 2);
+ r[ 9] = (t[6] >> 6) | (t[7] << 5);
+ r[10] = (t[7] >> 3);
+ r += 11;
+ }
+ }
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ uint16_t t[4];
+ for(i=0;ivec[i].coeffs[4*j+k];
+ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
+/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
+ }
+
+ r[0] = (t[0] >> 0);
+ r[1] = (t[0] >> 8) | (t[1] << 2);
+ r[2] = (t[1] >> 6) | (t[2] << 4);
+ r[3] = (t[2] >> 4) | (t[3] << 6);
+ r[4] = (t[3] >> 2);
+ r += 5;
+ }
+ }
+#else
+#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}"
+#endif
+}
+
+/*************************************************
+* Name: polyvec_decompress
+*
+* Description: De-serialize and decompress vector of polynomials;
+* approximate inverse of polyvec_compress
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const uint8_t *a: pointer to input byte array
+* (of length KYBER_POLYVECCOMPRESSEDBYTES)
+**************************************************/
+void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES])
+{
+ unsigned int i,j,k;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ uint16_t t[8];
+ for(i=0;i> 0) | ((uint16_t)a[ 1] << 8);
+ t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5);
+ t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10);
+ t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7);
+ t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4);
+ t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9);
+ t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6);
+ t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3);
+ a += 11;
+
+ for(k=0;k<8;k++)
+ r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11;
+ }
+ }
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ uint16_t t[4];
+ for(i=0;i> 0) | ((uint16_t)a[1] << 8);
+ t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6);
+ t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4);
+ t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2);
+ a += 5;
+
+ for(k=0;k<4;k++)
+ r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10;
+ }
+ }
+#else
+#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}"
+#endif
+}
+
+/*************************************************
+* Name: polyvec_tobytes
+*
+* Description: Serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECBYTES)
+* - const polyvec *a: pointer to input vector of polynomials
+**************************************************/
+void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_frombytes
+*
+* Description: De-serialize vector of polynomials;
+* inverse of polyvec_tobytes
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* - const polyvec *a: pointer to input vector of polynomials
+* (of length KYBER_POLYVECBYTES)
+**************************************************/
+void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES])
+{
+ unsigned int i;
+ for(i=0;ivec[i], a+i*KYBER_POLYBYTES);
+}
+
+/*************************************************
+* Name: polyvec_ntt
+*
+* Description: Apply forward NTT to all elements of a vector of polynomials
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+void polyvec_ntt(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_invntt_tomont
+*
+* Description: Apply inverse NTT to all elements of a vector of polynomials
+* and multiply by Montgomery factor 2^16
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+void polyvec_invntt_tomont(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_basemul_acc_montgomery
+*
+* Description: Multiply elements of a and b in NTT domain, accumulate into r,
+* and multiply by 2^-16.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ poly t;
+
+ poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]);
+ for(i=1;ivec[i], &b->vec[i]);
+ poly_add(r, r, &t);
+ }
+
+ poly_reduce(r);
+}
+
+/*************************************************
+* Name: polyvec_reduce
+*
+* Description: Applies Barrett reduction to each coefficient
+* of each element of a vector of polynomials;
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - polyvec *r: pointer to input/output polynomial
+**************************************************/
+void polyvec_reduce(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_add
+*
+* Description: Add vectors of polynomials
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ for(i=0;ivec[i], &a->vec[i], &b->vec[i]);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.h
new file mode 100644
index 0000000000..57b605494e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/polyvec.h
@@ -0,0 +1,36 @@
+#ifndef POLYVEC_H
+#define POLYVEC_H
+
+#include
+#include "params.h"
+#include "poly.h"
+
+typedef struct{
+ poly vec[KYBER_K];
+} polyvec;
+
+#define polyvec_compress KYBER_NAMESPACE(polyvec_compress)
+void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a);
+#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress)
+void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]);
+
+#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes)
+void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a);
+#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes)
+void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]);
+
+#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt)
+void polyvec_ntt(polyvec *r);
+#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont)
+void polyvec_invntt_tomont(polyvec *r);
+
+#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery)
+void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b);
+
+#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce)
+void polyvec_reduce(polyvec *r);
+
+#define polyvec_add KYBER_NAMESPACE(polyvec_add)
+void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.c
new file mode 100644
index 0000000000..9d8e7edf83
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.c
@@ -0,0 +1,42 @@
+#include
+#include "params.h"
+#include "reduce.h"
+
+/*************************************************
+* Name: montgomery_reduce
+*
+* Description: Montgomery reduction; given a 32-bit integer a, computes
+* 16-bit integer congruent to a * R^-1 mod q, where R=2^16
+*
+* Arguments: - int32_t a: input integer to be reduced;
+* has to be in {-q2^15,...,q2^15-1}
+*
+* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q.
+**************************************************/
+int16_t montgomery_reduce(int32_t a)
+{
+ int16_t t;
+
+ t = (int16_t)a*QINV;
+ t = (a - (int32_t)t*KYBER_Q) >> 16;
+ return t;
+}
+
+/*************************************************
+* Name: barrett_reduce
+*
+* Description: Barrett reduction; given a 16-bit integer a, computes
+* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2}
+*
+* Arguments: - int16_t a: input integer to be reduced
+*
+* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
+**************************************************/
+int16_t barrett_reduce(int16_t a) {
+ int16_t t;
+ const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q;
+
+ t = ((int32_t)v*a + (1<<25)) >> 26;
+ t *= KYBER_Q;
+ return a - t;
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.h
new file mode 100644
index 0000000000..c1bc1e4c7b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/reduce.h
@@ -0,0 +1,16 @@
+#ifndef REDUCE_H
+#define REDUCE_H
+
+#include
+#include "params.h"
+
+#define MONT -1044 // 2^16 mod q
+#define QINV -3327 // q^-1 mod 2^16
+
+#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce)
+int16_t montgomery_reduce(int32_t a);
+
+#define barrett_reduce KYBER_NAMESPACE(barrett_reduce)
+int16_t barrett_reduce(int16_t a);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric-shake.c
new file mode 100644
index 0000000000..20f451882e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric-shake.c
@@ -0,0 +1,74 @@
+#include
+#include
+#include
+#include "params.h"
+#include "symmetric.h"
+#include "fips202.h"
+
+/*************************************************
+* Name: kyber_shake128_absorb
+*
+* Description: Absorb step of the SHAKE128 specialized for the Kyber context.
+*
+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
+* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state
+* - uint8_t i: additional byte of input
+* - uint8_t j: additional byte of input
+**************************************************/
+void kyber_shake128_absorb(shake128incctx *state,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y)
+{
+ uint8_t extseed[KYBER_SYMBYTES+2];
+
+ memcpy(extseed, seed, KYBER_SYMBYTES);
+ extseed[KYBER_SYMBYTES+0] = x;
+ extseed[KYBER_SYMBYTES+1] = y;
+
+ shake128_absorb_once(state, extseed, sizeof(extseed));
+}
+
+/*************************************************
+* Name: kyber_shake256_prf
+*
+* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
+* and then generates outlen bytes of SHAKE256 output
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: number of requested output bytes
+* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
+* - uint8_t nonce: single-byte nonce (public PRF input)
+**************************************************/
+void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t extkey[KYBER_SYMBYTES+1];
+
+ memcpy(extkey, key, KYBER_SYMBYTES);
+ extkey[KYBER_SYMBYTES] = nonce;
+
+ shake256(out, outlen, extkey, sizeof(extkey));
+}
+
+/*************************************************
+* Name: kyber_shake256_prf
+*
+* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
+* and then generates outlen bytes of SHAKE256 output
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: number of requested output bytes
+* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
+* - uint8_t nonce: single-byte nonce (public PRF input)
+**************************************************/
+void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES])
+{
+ shake256incctx s;
+
+ shake256_inc_init(&s);
+ shake256_inc_absorb(&s, key, KYBER_SYMBYTES);
+ shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES);
+ shake256_inc_finalize(&s);
+ shake256_inc_squeeze(out, KYBER_SSBYTES, &s);
+ shake256_inc_ctx_release(&s);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric.h
new file mode 100644
index 0000000000..2acc66f98d
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/symmetric.h
@@ -0,0 +1,35 @@
+#ifndef SYMMETRIC_H
+#define SYMMETRIC_H
+
+#include
+#include
+#include "params.h"
+
+#include "fips202.h"
+
+typedef shake128incctx xof_state;
+
+#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb)
+void kyber_shake128_absorb(shake128incctx *s,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y);
+
+#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf)
+void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce);
+
+#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf)
+void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]);
+
+#define XOF_BLOCKBYTES SHAKE128_RATE
+
+#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES)
+#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES)
+#define xof_init(STATE, SEED) shake128_inc_init(STATE)
+#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y)
+#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE)
+#define xof_release(STATE) shake128_inc_ctx_release(STATE)
+#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE)
+#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT)
+
+#endif /* SYMMETRIC_H */
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/verify.c
new file mode 100644
index 0000000000..ed4a6541f8
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024-ipd_ref/verify.c
@@ -0,0 +1,47 @@
+#include
+#include
+#include "verify.h"
+
+/*************************************************
+* Name: verify
+*
+* Description: Compare two arrays for equality in constant time.
+*
+* Arguments: const uint8_t *a: pointer to first byte array
+* const uint8_t *b: pointer to second byte array
+* size_t len: length of the byte arrays
+*
+* Returns 0 if the byte arrays are equal, 1 otherwise
+**************************************************/
+int verify(const uint8_t *a, const uint8_t *b, size_t len)
+{
+ size_t i;
+ uint8_t r = 0;
+
+ for(i=0;i> 63;
+}
+
+/*************************************************
+* Name: cmov
+*
+* Description: Copy len bytes from x to r if b is 1;
+* don't modify x if b is 0. Requires b to be in {0,1};
+* assumes two's complement representation of negative integers.
+* Runs in constant time.
+*
+* Arguments: uint8_t *r: pointer to output byte array
+* const uint8_t *x: pointer to input byte array
+* size_t len: Amount of bytes to be copied
+* uint8_t b: Condition bit; has to be in {0,1}
+**************************************************/
+void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+{
+ size_t i;
+
+ b = -b;
+ for(i=0;i
+#include
+#include "params.h"
+
+#define verify KYBER_NAMESPACE(verify)
+int verify(const uint8_t *a, const uint8_t *b, size_t len);
+
+#define cmov KYBER_NAMESPACE(cmov)
+void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/LICENSE
new file mode 100644
index 0000000000..7922ab8007
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/LICENSE
@@ -0,0 +1,6 @@
+Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/);
+or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html).
+
+For Keccak and AES we are using public-domain
+code from sources and by authors listed in
+comments on top of the respective files.
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/align.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/align.h
new file mode 100644
index 0000000000..3463866f37
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/align.h
@@ -0,0 +1,19 @@
+#ifndef ALIGN_H
+#define ALIGN_H
+
+#include
+#include
+
+#define ALIGNED_UINT8(N) \
+ union { \
+ uint8_t coeffs[N]; \
+ __m256i vec[(N+31)/32]; \
+ }
+
+#define ALIGNED_INT16(N) \
+ union { \
+ int16_t coeffs[N]; \
+ __m256i vec[(N+15)/16]; \
+ }
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/api.h
new file mode 100644
index 0000000000..a154e80f1d
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/api.h
@@ -0,0 +1,66 @@
+#ifndef API_H
+#define API_H
+
+#include
+
+#define pqcrystals_kyber512_SECRETKEYBYTES 1632
+#define pqcrystals_kyber512_PUBLICKEYBYTES 800
+#define pqcrystals_kyber512_CIPHERTEXTBYTES 768
+#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber512_ENCCOINBYTES 32
+#define pqcrystals_kyber512_BYTES 32
+
+#define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES
+#define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES
+#define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES
+#define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES
+#define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES
+#define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES
+
+int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber768_SECRETKEYBYTES 2400
+#define pqcrystals_kyber768_PUBLICKEYBYTES 1184
+#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088
+#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber768_ENCCOINBYTES 32
+#define pqcrystals_kyber768_BYTES 32
+
+#define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES
+#define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES
+#define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES
+#define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES
+#define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES
+#define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES
+
+int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber1024_SECRETKEYBYTES 3168
+#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568
+#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568
+#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber1024_ENCCOINBYTES 32
+#define pqcrystals_kyber1024_BYTES 32
+
+#define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES
+#define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES
+#define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES
+#define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES
+#define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES
+#define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES
+
+int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/basemul.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/basemul.S
new file mode 100644
index 0000000000..36990639b2
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/basemul.S
@@ -0,0 +1,105 @@
+#include "consts.h"
+
+.macro schoolbook off
+vmovdqa _16XQINV*2(%rcx),%ymm0
+vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0
+vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0
+vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1
+vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1
+
+vpmullw %ymm0,%ymm1,%ymm9 # a0.lo
+vpmullw %ymm0,%ymm2,%ymm10 # b0.lo
+vpmullw %ymm0,%ymm3,%ymm11 # a1.lo
+vpmullw %ymm0,%ymm4,%ymm12 # b1.lo
+
+vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0
+vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0
+
+vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi
+vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi
+vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi
+vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi
+
+vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1
+vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1
+
+vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi
+vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi
+vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi
+vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi
+
+vmovdqa %ymm13,(%rsp)
+
+vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo
+vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo
+vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo
+vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo
+
+vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo
+vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo
+vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo
+vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo
+
+vmovdqa _16XQ*2(%rcx),%ymm8
+vpmulhw %ymm8,%ymm13,%ymm13
+vpmulhw %ymm8,%ymm9,%ymm9
+vpmulhw %ymm8,%ymm5,%ymm5
+vpmulhw %ymm8,%ymm10,%ymm10
+vpmulhw %ymm8,%ymm6,%ymm6
+vpmulhw %ymm8,%ymm11,%ymm11
+vpmulhw %ymm8,%ymm7,%ymm7
+vpmulhw %ymm8,%ymm12,%ymm12
+
+vpsubw (%rsp),%ymm13,%ymm13 # -a0c0
+vpsubw %ymm9,%ymm1,%ymm9 # a0d0
+vpsubw %ymm5,%ymm14,%ymm5 # b0c0
+vpsubw %ymm10,%ymm2,%ymm10 # b0d0
+
+vpsubw %ymm6,%ymm15,%ymm6 # a1c1
+vpsubw %ymm11,%ymm3,%ymm11 # a1d1
+vpsubw %ymm7,%ymm0,%ymm7 # b1c1
+vpsubw %ymm12,%ymm4,%ymm12 # b1d1
+
+vmovdqa (%r9),%ymm0
+vmovdqa 32(%r9),%ymm1
+vpmullw %ymm0,%ymm10,%ymm2
+vpmullw %ymm0,%ymm12,%ymm3
+vpmulhw %ymm1,%ymm10,%ymm10
+vpmulhw %ymm1,%ymm12,%ymm12
+vpmulhw %ymm8,%ymm2,%ymm2
+vpmulhw %ymm8,%ymm3,%ymm3
+vpsubw %ymm2,%ymm10,%ymm10 # rb0d0
+vpsubw %ymm3,%ymm12,%ymm12 # rb1d1
+
+vpaddw %ymm5,%ymm9,%ymm9
+vpaddw %ymm7,%ymm11,%ymm11
+vpsubw %ymm13,%ymm10,%ymm13
+vpsubw %ymm12,%ymm6,%ymm6
+
+vmovdqa %ymm13,(64*\off+ 0)*2(%rdi)
+vmovdqa %ymm9,(64*\off+16)*2(%rdi)
+vmovdqa %ymm6,(64*\off+32)*2(%rdi)
+vmovdqa %ymm11,(64*\off+48)*2(%rdi)
+.endm
+
+.text
+.global cdecl(basemul_avx)
+cdecl(basemul_avx):
+mov %rsp,%r8
+and $-32,%rsp
+sub $32,%rsp
+
+lea (_ZETAS_EXP+176)*2(%rcx),%r9
+schoolbook 0
+
+add $32*2,%r9
+schoolbook 1
+
+add $192*2,%r9
+schoolbook 2
+
+add $32*2,%r9
+schoolbook 3
+
+mov %r8,%rsp
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.c
new file mode 100644
index 0000000000..dad473c79e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.c
@@ -0,0 +1,144 @@
+#include
+#include
+#include "params.h"
+#include "cbd.h"
+
+/*************************************************
+* Name: cbd2
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const __m256i *buf: pointer to aligned input byte array
+**************************************************/
+static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128])
+{
+ unsigned int i;
+ __m256i f0, f1, f2, f3;
+ const __m256i mask55 = _mm256_set1_epi32(0x55555555);
+ const __m256i mask33 = _mm256_set1_epi32(0x33333333);
+ const __m256i mask03 = _mm256_set1_epi32(0x03030303);
+ const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F);
+
+ for(i = 0; i < KYBER_N/64; i++) {
+ f0 = _mm256_load_si256(&buf[i]);
+
+ f1 = _mm256_srli_epi16(f0, 1);
+ f0 = _mm256_and_si256(mask55, f0);
+ f1 = _mm256_and_si256(mask55, f1);
+ f0 = _mm256_add_epi8(f0, f1);
+
+ f1 = _mm256_srli_epi16(f0, 2);
+ f0 = _mm256_and_si256(mask33, f0);
+ f1 = _mm256_and_si256(mask33, f1);
+ f0 = _mm256_add_epi8(f0, mask33);
+ f0 = _mm256_sub_epi8(f0, f1);
+
+ f1 = _mm256_srli_epi16(f0, 4);
+ f0 = _mm256_and_si256(mask0F, f0);
+ f1 = _mm256_and_si256(mask0F, f1);
+ f0 = _mm256_sub_epi8(f0, mask03);
+ f1 = _mm256_sub_epi8(f1, mask03);
+
+ f2 = _mm256_unpacklo_epi8(f0, f1);
+ f3 = _mm256_unpackhi_epi8(f0, f1);
+
+ f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2));
+ f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1));
+ f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3));
+ f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1));
+
+ _mm256_store_si256(&r->vec[4*i+0], f0);
+ _mm256_store_si256(&r->vec[4*i+1], f2);
+ _mm256_store_si256(&r->vec[4*i+2], f1);
+ _mm256_store_si256(&r->vec[4*i+3], f3);
+ }
+}
+
+#if KYBER_ETA1 == 3
+/*************************************************
+* Name: cbd3
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=3
+* This function is only needed for Kyber-512
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const __m256i *buf: pointer to aligned input byte array
+**************************************************/
+static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8])
+{
+ unsigned int i;
+ __m256i f0, f1, f2, f3;
+ const __m256i mask249 = _mm256_set1_epi32(0x249249);
+ const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB);
+ const __m256i mask07 = _mm256_set1_epi32(7);
+ const __m256i mask70 = _mm256_set1_epi32(7 << 16);
+ const __m256i mask3 = _mm256_set1_epi16(3);
+ const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4,
+ -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0);
+
+ for(i = 0; i < KYBER_N/32; i++) {
+ f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]);
+ f0 = _mm256_permute4x64_epi64(f0,0x94);
+ f0 = _mm256_shuffle_epi8(f0,shufbidx);
+
+ f1 = _mm256_srli_epi32(f0,1);
+ f2 = _mm256_srli_epi32(f0,2);
+ f0 = _mm256_and_si256(mask249,f0);
+ f1 = _mm256_and_si256(mask249,f1);
+ f2 = _mm256_and_si256(mask249,f2);
+ f0 = _mm256_add_epi32(f0,f1);
+ f0 = _mm256_add_epi32(f0,f2);
+
+ f1 = _mm256_srli_epi32(f0,3);
+ f0 = _mm256_add_epi32(f0,mask6DB);
+ f0 = _mm256_sub_epi32(f0,f1);
+
+ f1 = _mm256_slli_epi32(f0,10);
+ f2 = _mm256_srli_epi32(f0,12);
+ f3 = _mm256_srli_epi32(f0, 2);
+ f0 = _mm256_and_si256(f0,mask07);
+ f1 = _mm256_and_si256(f1,mask70);
+ f2 = _mm256_and_si256(f2,mask07);
+ f3 = _mm256_and_si256(f3,mask70);
+ f0 = _mm256_add_epi16(f0,f1);
+ f1 = _mm256_add_epi16(f2,f3);
+ f0 = _mm256_sub_epi16(f0,mask3);
+ f1 = _mm256_sub_epi16(f1,mask3);
+
+ f2 = _mm256_unpacklo_epi32(f0,f1);
+ f3 = _mm256_unpackhi_epi32(f0,f1);
+
+ f0 = _mm256_permute2x128_si256(f2,f3,0x20);
+ f1 = _mm256_permute2x128_si256(f2,f3,0x31);
+
+ _mm256_store_si256(&r->vec[2*i+0], f0);
+ _mm256_store_si256(&r->vec[2*i+1], f1);
+ }
+}
+#endif
+
+/* buf 32 bytes longer for cbd3 */
+void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1])
+{
+#if KYBER_ETA1 == 2
+ cbd2(r, buf);
+#elif KYBER_ETA1 == 3
+ cbd3(r, (uint8_t *)buf);
+#else
+#error "This implementation requires eta1 in {2,3}"
+#endif
+}
+
+void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128])
+{
+#if KYBER_ETA2 == 2
+ cbd2(r, buf);
+#else
+#error "This implementation requires eta2 = 2"
+#endif
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.h
new file mode 100644
index 0000000000..05788e06b4
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/cbd.h
@@ -0,0 +1,15 @@
+#ifndef CBD_H
+#define CBD_H
+
+#include
+#include
+#include "params.h"
+#include "poly.h"
+
+#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1)
+void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]);
+
+#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2)
+void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.c
new file mode 100644
index 0000000000..84e596893d
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.c
@@ -0,0 +1,121 @@
+#include "align.h"
+#include "params.h"
+#include "consts.h"
+
+#define Q KYBER_Q
+#define MONT -1044 // 2^16 mod q
+#define QINV -3327 // q^-1 mod 2^16
+#define V 20159 // floor(2^26/q + 0.5)
+#define FHI 1441 // mont^2/128
+#define FLO -10079 // qinv*FHI
+#define MONTSQHI 1353 // mont^2
+#define MONTSQLO 20553 // qinv*MONTSQHI
+#define MASK 4095
+#define SHIFT 32
+
+const qdata_t qdata = {{
+#define _16XQ 0
+ Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
+
+#define _16XQINV 16
+ QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV,
+ QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV,
+
+#define _16XV 32
+ V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+
+#define _16XFLO 48
+ FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO,
+ FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO,
+
+#define _16XFHI 64
+ FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI,
+ FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI,
+
+#define _16XMONTSQLO 80
+ MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
+ MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
+ MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
+ MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
+
+#define _16XMONTSQHI 96
+ MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
+ MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
+ MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
+ MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
+
+#define _16XMASK 112
+ MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK,
+ MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK,
+
+#define _REVIDXB 128
+ 3854, 3340, 2826, 2312, 1798, 1284, 770, 256,
+ 3854, 3340, 2826, 2312, 1798, 1284, 770, 256,
+
+#define _REVIDXD 144
+ 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0,
+
+#define _ZETAS_EXP 160
+ 31498, 31498, 31498, 31498, -758, -758, -758, -758,
+ 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397,
+ 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745,
+ 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745,
+ -359, -359, -359, -359, -359, -359, -359, -359,
+ -359, -359, -359, -359, -359, -359, -359, -359,
+ 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525,
+ -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402,
+ 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493,
+ 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422,
+ -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758,
+ -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690,
+ -171, -171, -171, -171, 622, 622, 622, 622,
+ 1577, 1577, 1577, 1577, 182, 182, 182, 182,
+ -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057,
+ 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242,
+ 573, 573, -1325, -1325, 264, 264, 383, 383,
+ -829, -829, 1458, 1458, -1602, -1602, -130, -130,
+ -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080,
+ -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837,
+ 1223, 652, -552, 1015, -1293, 1491, -282, -1544,
+ 516, -8, -320, -666, -1618, -1162, 126, 1469,
+ -335, -11477, -32227, 20494, -27738, 945, -14883, 6182,
+ 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276,
+ -1103, 555, -1251, 1550, 422, 177, -291, 1574,
+ -246, 1159, -777, -602, -1590, -872, 418, -156,
+ 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493,
+ -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619,
+ 430, 843, 871, 105, 587, -235, -460, 1653,
+ 778, -147, 1483, 1119, 644, 349, 329, -75,
+ 787, 787, 787, 787, 787, 787, 787, 787,
+ 787, 787, 787, 787, 787, 787, 787, 787,
+ -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517,
+ -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517,
+ 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191,
+ -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694,
+ 287, 287, 287, 287, 287, 287, 287, 287,
+ 202, 202, 202, 202, 202, 202, 202, 202,
+ 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358,
+ -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164,
+ 962, 962, 962, 962, -1202, -1202, -1202, -1202,
+ -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468,
+ -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800,
+ 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163,
+ -681, -681, 1017, 1017, 732, 732, 608, 608,
+ -1542, -1542, 411, 411, -205, -205, -1571, -1571,
+ 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249,
+ 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915,
+ -853, -90, -271, 830, 107, -1421, -247, -951,
+ -398, 961, -1508, -725, 448, -1065, 677, -1275,
+ -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989,
+ 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422,
+ 817, 603, 1322, -1465, -1215, 1218, -874, -1187,
+ -1185, -1278, -1510, -870, -108, 996, 958, 1522,
+ 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469,
+ -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132,
+ 1097, 610, -1285, 384, -136, -1335, 220, -1659,
+ -1530, 794, -854, 478, -308, 991, -1460, 1628,
+
+#define _16XSHIFT 624
+ SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT,
+ SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT
+}};
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.h
new file mode 100644
index 0000000000..f95899cd8e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/consts.h
@@ -0,0 +1,43 @@
+#ifndef CONSTS_H
+#define CONSTS_H
+
+#include "params.h"
+
+#define _16XQ 0
+#define _16XQINV 16
+#define _16XV 32
+#define _16XFLO 48
+#define _16XFHI 64
+#define _16XMONTSQLO 80
+#define _16XMONTSQHI 96
+#define _16XMASK 112
+#define _REVIDXB 128
+#define _REVIDXD 144
+#define _ZETAS_EXP 160
+#define _16XSHIFT 624
+
+/* The C ABI on MacOS exports all symbols with a leading
+ * underscore. This means that any symbols we refer to from
+ * C files (functions) can't be found, and all symbols we
+ * refer to from ASM also can't be found.
+ *
+ * This define helps us get around this
+ */
+#ifdef __ASSEMBLER__
+#if defined(__WIN32__) || defined(__APPLE__)
+#define decorate(s) _##s
+#define cdecl2(s) decorate(s)
+#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s))
+#else
+#define cdecl(s) KYBER_NAMESPACE(##s)
+#endif
+#endif
+
+#ifndef __ASSEMBLER__
+#include "align.h"
+typedef ALIGNED_INT16(640) qdata_t;
+#define qdata KYBER_NAMESPACE(qdata)
+extern const qdata_t qdata;
+#endif
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.S
new file mode 100644
index 0000000000..3bb1ebd3d8
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.S
@@ -0,0 +1,88 @@
+#include "consts.h"
+.include "fq.inc"
+
+.text
+reduce128_avx:
+#load
+vmovdqa (%rdi),%ymm2
+vmovdqa 32(%rdi),%ymm3
+vmovdqa 64(%rdi),%ymm4
+vmovdqa 96(%rdi),%ymm5
+vmovdqa 128(%rdi),%ymm6
+vmovdqa 160(%rdi),%ymm7
+vmovdqa 192(%rdi),%ymm8
+vmovdqa 224(%rdi),%ymm9
+
+red16 2
+red16 3
+red16 4
+red16 5
+red16 6
+red16 7
+red16 8
+red16 9
+
+#store
+vmovdqa %ymm2,(%rdi)
+vmovdqa %ymm3,32(%rdi)
+vmovdqa %ymm4,64(%rdi)
+vmovdqa %ymm5,96(%rdi)
+vmovdqa %ymm6,128(%rdi)
+vmovdqa %ymm7,160(%rdi)
+vmovdqa %ymm8,192(%rdi)
+vmovdqa %ymm9,224(%rdi)
+
+ret
+
+.global cdecl(reduce_avx)
+cdecl(reduce_avx):
+#consts
+vmovdqa _16XQ*2(%rsi),%ymm0
+vmovdqa _16XV*2(%rsi),%ymm1
+call reduce128_avx
+add $256,%rdi
+call reduce128_avx
+ret
+
+tomont128_avx:
+#load
+vmovdqa (%rdi),%ymm3
+vmovdqa 32(%rdi),%ymm4
+vmovdqa 64(%rdi),%ymm5
+vmovdqa 96(%rdi),%ymm6
+vmovdqa 128(%rdi),%ymm7
+vmovdqa 160(%rdi),%ymm8
+vmovdqa 192(%rdi),%ymm9
+vmovdqa 224(%rdi),%ymm10
+
+fqmulprecomp 1,2,3,11
+fqmulprecomp 1,2,4,12
+fqmulprecomp 1,2,5,13
+fqmulprecomp 1,2,6,14
+fqmulprecomp 1,2,7,15
+fqmulprecomp 1,2,8,11
+fqmulprecomp 1,2,9,12
+fqmulprecomp 1,2,10,13
+
+#store
+vmovdqa %ymm3,(%rdi)
+vmovdqa %ymm4,32(%rdi)
+vmovdqa %ymm5,64(%rdi)
+vmovdqa %ymm6,96(%rdi)
+vmovdqa %ymm7,128(%rdi)
+vmovdqa %ymm8,160(%rdi)
+vmovdqa %ymm9,192(%rdi)
+vmovdqa %ymm10,224(%rdi)
+
+ret
+
+.global cdecl(tomont_avx)
+cdecl(tomont_avx):
+#consts
+vmovdqa _16XQ*2(%rsi),%ymm0
+vmovdqa _16XMONTSQLO*2(%rsi),%ymm1
+vmovdqa _16XMONTSQHI*2(%rsi),%ymm2
+call tomont128_avx
+add $256,%rdi
+call tomont128_avx
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.inc
new file mode 100644
index 0000000000..4b7afc3118
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/fq.inc
@@ -0,0 +1,30 @@
+.macro red16 r,rs=0,x=12
+vpmulhw %ymm1,%ymm\r,%ymm\x
+.if \rs
+vpmulhrsw %ymm\rs,%ymm\x,%ymm\x
+.else
+vpsraw $10,%ymm\x,%ymm\x
+.endif
+vpmullw %ymm0,%ymm\x,%ymm\x
+vpsubw %ymm\x,%ymm\r,%ymm\r
+.endm
+
+.macro csubq r,x=12
+vpsubw %ymm0,%ymm\r,%ymm\r
+vpsraw $15,%ymm\r,%ymm\x
+vpand %ymm0,%ymm\x,%ymm\x
+vpaddw %ymm\x,%ymm\r,%ymm\r
+.endm
+
+.macro caddq r,x=12
+vpsraw $15,%ymm\r,%ymm\x
+vpand %ymm0,%ymm\x,%ymm\x
+vpaddw %ymm\x,%ymm\r,%ymm\r
+.endm
+
+.macro fqmulprecomp al,ah,b,x=12
+vpmullw %ymm\al,%ymm\b,%ymm\x
+vpmulhw %ymm\ah,%ymm\b,%ymm\b
+vpmulhw %ymm0,%ymm\x,%ymm\x
+vpsubw %ymm\x,%ymm\b,%ymm\b
+.endm
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/indcpa.c
new file mode 100644
index 0000000000..572ce49007
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/indcpa.c
@@ -0,0 +1,566 @@
+#include
+#include
+#include
+#include
+#include "align.h"
+#include "params.h"
+#include "indcpa.h"
+#include "polyvec.h"
+#include "poly.h"
+#include "ntt.h"
+#include "cbd.h"
+#include "rejsample.h"
+#include "symmetric.h"
+#include "randombytes.h"
+
+/*************************************************
+* Name: pack_pk
+*
+* Description: Serialize the public key as concatenation of the
+* serialized vector of polynomials pk and the
+* public seed used to generate the matrix A.
+* The polynomial coefficients in pk are assumed to
+* lie in the invertal [0,q], i.e. pk must be reduced
+* by polyvec_reduce().
+*
+* Arguments: uint8_t *r: pointer to the output serialized public key
+* polyvec *pk: pointer to the input public-key polyvec
+* const uint8_t *seed: pointer to the input public seed
+**************************************************/
+static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES],
+ polyvec *pk,
+ const uint8_t seed[KYBER_SYMBYTES])
+{
+ polyvec_tobytes(r, pk);
+ memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES);
+}
+
+/*************************************************
+* Name: unpack_pk
+*
+* Description: De-serialize public key from a byte array;
+* approximate inverse of pack_pk
+*
+* Arguments: - polyvec *pk: pointer to output public-key polynomial vector
+* - uint8_t *seed: pointer to output seed to generate matrix A
+* - const uint8_t *packedpk: pointer to input serialized public key
+**************************************************/
+static void unpack_pk(polyvec *pk,
+ uint8_t seed[KYBER_SYMBYTES],
+ const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES])
+{
+ polyvec_frombytes(pk, packedpk);
+ memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES);
+}
+
+/*************************************************
+* Name: pack_sk
+*
+* Description: Serialize the secret key.
+* The polynomial coefficients in sk are assumed to
+* lie in the invertal [0,q], i.e. sk must be reduced
+* by polyvec_reduce().
+*
+* Arguments: - uint8_t *r: pointer to output serialized secret key
+* - polyvec *sk: pointer to input vector of polynomials (secret key)
+**************************************************/
+static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk)
+{
+ polyvec_tobytes(r, sk);
+}
+
+/*************************************************
+* Name: unpack_sk
+*
+* Description: De-serialize the secret key; inverse of pack_sk
+*
+* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key)
+* - const uint8_t *packedsk: pointer to input serialized secret key
+**************************************************/
+static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES])
+{
+ polyvec_frombytes(sk, packedsk);
+}
+
+/*************************************************
+* Name: pack_ciphertext
+*
+* Description: Serialize the ciphertext as concatenation of the
+* compressed and serialized vector of polynomials b
+* and the compressed and serialized polynomial v.
+* The polynomial coefficients in b and v are assumed to
+* lie in the invertal [0,q], i.e. b and v must be reduced
+* by polyvec_reduce() and poly_reduce(), respectively.
+*
+* Arguments: uint8_t *r: pointer to the output serialized ciphertext
+* poly *pk: pointer to the input vector of polynomials b
+* poly *v: pointer to the input polynomial v
+**************************************************/
+static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v)
+{
+ polyvec_compress(r, b);
+ poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v);
+}
+
+/*************************************************
+* Name: unpack_ciphertext
+*
+* Description: De-serialize and decompress ciphertext from a byte array;
+* approximate inverse of pack_ciphertext
+*
+* Arguments: - polyvec *b: pointer to the output vector of polynomials b
+* - poly *v: pointer to the output polynomial v
+* - const uint8_t *c: pointer to the input serialized ciphertext
+**************************************************/
+static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES])
+{
+ polyvec_decompress(b, c);
+ poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES);
+}
+
+/*************************************************
+* Name: rej_uniform
+*
+* Description: Run rejection sampling on uniform random bytes to generate
+* uniform random integers mod q
+*
+* Arguments: - int16_t *r: pointer to output array
+* - unsigned int len: requested number of 16-bit integers (uniform mod q)
+* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes)
+* - unsigned int buflen: length of input buffer in bytes
+*
+* Returns number of sampled 16-bit integers (at most len)
+**************************************************/
+static unsigned int rej_uniform(int16_t *r,
+ unsigned int len,
+ const uint8_t *buf,
+ unsigned int buflen)
+{
+ unsigned int ctr, pos;
+ uint16_t val0, val1;
+
+ ctr = pos = 0;
+ while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3
+ val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF;
+ val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF;
+ pos += 3;
+
+ if(val0 < KYBER_Q)
+ r[ctr++] = val0;
+ if(ctr < len && val1 < KYBER_Q)
+ r[ctr++] = val1;
+ }
+
+ return ctr;
+}
+
+#define gen_a(A,B) gen_matrix(A,B,0)
+#define gen_at(A,B) gen_matrix(A,B,1)
+
+/*************************************************
+* Name: gen_matrix
+*
+* Description: Deterministically generate matrix A (or the transpose of A)
+* from a seed. Entries of the matrix are polynomials that look
+* uniformly random. Performs rejection sampling on output of
+* a XOF
+*
+* Arguments: - polyvec *a: pointer to ouptput matrix A
+* - const uint8_t *seed: pointer to input seed
+* - int transposed: boolean deciding whether A or A^T is generated
+**************************************************/
+#if KYBER_K == 2
+void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+{
+ unsigned int ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+ shake128x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ if(transposed) {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = 0;
+ buf[1].coeffs[33] = 1;
+ buf[2].coeffs[32] = 1;
+ buf[2].coeffs[33] = 0;
+ buf[3].coeffs[32] = 1;
+ buf[3].coeffs[33] = 1;
+ }
+ else {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = 1;
+ buf[1].coeffs[33] = 0;
+ buf[2].coeffs[32] = 0;
+ buf[2].coeffs[33] = 1;
+ buf[3].coeffs[32] = 1;
+ buf[3].coeffs[33] = 1;
+ }
+
+ shake128x4_inc_init(&state);
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+ ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs);
+ ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs);
+ ctr2 = rej_uniform_avx(a[1].vec[0].coeffs, buf[2].coeffs);
+ ctr3 = rej_uniform_avx(a[1].vec[1].coeffs, buf[3].coeffs);
+
+ while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) {
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state);
+
+ ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE);
+ ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
+
+ poly_nttunpack(&a[0].vec[0]);
+ poly_nttunpack(&a[0].vec[1]);
+ poly_nttunpack(&a[1].vec[0]);
+ poly_nttunpack(&a[1].vec[1]);
+ shake128x4_inc_ctx_release(&state);
+}
+#elif KYBER_K == 3
+void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+{
+ unsigned int ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+ shake128x4incctx state;
+ shake128incctx state1x;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ if(transposed) {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = 0;
+ buf[1].coeffs[33] = 1;
+ buf[2].coeffs[32] = 0;
+ buf[2].coeffs[33] = 2;
+ buf[3].coeffs[32] = 1;
+ buf[3].coeffs[33] = 0;
+ }
+ else {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = 1;
+ buf[1].coeffs[33] = 0;
+ buf[2].coeffs[32] = 2;
+ buf[2].coeffs[33] = 0;
+ buf[3].coeffs[32] = 0;
+ buf[3].coeffs[33] = 1;
+ }
+
+ shake128x4_inc_init(&state);
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+ ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs);
+ ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs);
+ ctr2 = rej_uniform_avx(a[0].vec[2].coeffs, buf[2].coeffs);
+ ctr3 = rej_uniform_avx(a[1].vec[0].coeffs, buf[3].coeffs);
+
+ while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) {
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state);
+
+ ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE);
+ ctr2 += rej_uniform(a[0].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[1].vec[0].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
+
+ poly_nttunpack(&a[0].vec[0]);
+ poly_nttunpack(&a[0].vec[1]);
+ poly_nttunpack(&a[0].vec[2]);
+ poly_nttunpack(&a[1].vec[0]);
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ if(transposed) {
+ buf[0].coeffs[32] = 1;
+ buf[0].coeffs[33] = 1;
+ buf[1].coeffs[32] = 1;
+ buf[1].coeffs[33] = 2;
+ buf[2].coeffs[32] = 2;
+ buf[2].coeffs[33] = 0;
+ buf[3].coeffs[32] = 2;
+ buf[3].coeffs[33] = 1;
+ }
+ else {
+ buf[0].coeffs[32] = 1;
+ buf[0].coeffs[33] = 1;
+ buf[1].coeffs[32] = 2;
+ buf[1].coeffs[33] = 1;
+ buf[2].coeffs[32] = 0;
+ buf[2].coeffs[33] = 2;
+ buf[3].coeffs[32] = 1;
+ buf[3].coeffs[33] = 2;
+ }
+
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+ ctr0 = rej_uniform_avx(a[1].vec[1].coeffs, buf[0].coeffs);
+ ctr1 = rej_uniform_avx(a[1].vec[2].coeffs, buf[1].coeffs);
+ ctr2 = rej_uniform_avx(a[2].vec[0].coeffs, buf[2].coeffs);
+ ctr3 = rej_uniform_avx(a[2].vec[1].coeffs, buf[3].coeffs);
+
+ while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) {
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state);
+
+ ctr0 += rej_uniform(a[1].vec[1].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ ctr1 += rej_uniform(a[1].vec[2].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE);
+ ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
+ shake128x4_inc_ctx_release(&state);
+
+ poly_nttunpack(&a[1].vec[1]);
+ poly_nttunpack(&a[1].vec[2]);
+ poly_nttunpack(&a[2].vec[0]);
+ poly_nttunpack(&a[2].vec[1]);
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ buf[0].coeffs[32] = 2;
+ buf[0].coeffs[33] = 2;
+
+ shake128_inc_init(&state1x);
+ shake128_absorb_once(&state1x, buf[0].coeffs, 34);
+ shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x);
+ ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs);
+ while(ctr0 < KYBER_N) {
+ shake128_squeezeblocks(buf[0].coeffs, 1, &state1x);
+ ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ }
+ shake128_inc_ctx_release(&state1x);
+
+ poly_nttunpack(&a[2].vec[2]);
+}
+#elif KYBER_K == 4
+void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed)
+{
+ unsigned int i, ctr0, ctr1, ctr2, ctr3;
+ ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4];
+ __m256i f;
+ shake128x4incctx state;
+ shake128x4_inc_init(&state);
+
+ for(i=0;i<4;i++) {
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ if(transposed) {
+ buf[0].coeffs[32] = i;
+ buf[0].coeffs[33] = 0;
+ buf[1].coeffs[32] = i;
+ buf[1].coeffs[33] = 1;
+ buf[2].coeffs[32] = i;
+ buf[2].coeffs[33] = 2;
+ buf[3].coeffs[32] = i;
+ buf[3].coeffs[33] = 3;
+ }
+ else {
+ buf[0].coeffs[32] = 0;
+ buf[0].coeffs[33] = i;
+ buf[1].coeffs[32] = 1;
+ buf[1].coeffs[33] = i;
+ buf[2].coeffs[32] = 2;
+ buf[2].coeffs[33] = i;
+ buf[3].coeffs[32] = 3;
+ buf[3].coeffs[33] = i;
+ }
+
+ shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34);
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state);
+
+ ctr0 = rej_uniform_avx(a[i].vec[0].coeffs, buf[0].coeffs);
+ ctr1 = rej_uniform_avx(a[i].vec[1].coeffs, buf[1].coeffs);
+ ctr2 = rej_uniform_avx(a[i].vec[2].coeffs, buf[2].coeffs);
+ ctr3 = rej_uniform_avx(a[i].vec[3].coeffs, buf[3].coeffs);
+
+ while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) {
+ shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state);
+
+ ctr0 += rej_uniform(a[i].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE);
+ ctr1 += rej_uniform(a[i].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE);
+ ctr2 += rej_uniform(a[i].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE);
+ ctr3 += rej_uniform(a[i].vec[3].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE);
+ }
+
+ poly_nttunpack(&a[i].vec[0]);
+ poly_nttunpack(&a[i].vec[1]);
+ poly_nttunpack(&a[i].vec[2]);
+ poly_nttunpack(&a[i].vec[3]);
+ }
+ shake128x4_inc_ctx_release(&state);
+}
+#endif
+
+/*************************************************
+* Name: indcpa_keypair_derand
+*
+* Description: Generates public and private key for the CPA-secure
+* public-key encryption scheme underlying Kyber
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (of length KYBER_INDCPA_SECRETKEYBYTES bytes)
+* - const uint8_t *coins: pointer to input randomness
+* (of length KYBER_SYMBYTES bytes)
+**************************************************/
+void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES])
+{
+ unsigned int i;
+ uint8_t buf[2*KYBER_SYMBYTES];
+ const uint8_t *publicseed = buf;
+ const uint8_t *noiseseed = buf + KYBER_SYMBYTES;
+ polyvec a[KYBER_K], e, pkpv, skpv;
+
+ hash_g(buf, coins, KYBER_SYMBYTES);
+
+ gen_a(a, publicseed);
+
+#if KYBER_K == 2
+ poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3);
+#elif KYBER_K == 3
+ poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3);
+ poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7);
+#elif KYBER_K == 4
+ poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3);
+ poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7);
+#endif
+
+ polyvec_ntt(&skpv);
+ polyvec_reduce(&skpv);
+ polyvec_ntt(&e);
+
+ // matrix-vector multiplication
+ for(i=0;i
+#include "params.h"
+#include "polyvec.h"
+
+#define gen_matrix KYBER_NAMESPACE(gen_matrix)
+void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed);
+
+#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand)
+void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_enc KYBER_NAMESPACE(indcpa_enc)
+void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_dec KYBER_NAMESPACE(indcpa_dec)
+void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/invntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/invntt.S
new file mode 100644
index 0000000000..76d4189996
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/invntt.S
@@ -0,0 +1,193 @@
+#include "consts.h"
+.include "shuffle.inc"
+.include "fq.inc"
+
+.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3
+vpsubw %ymm\rl0,%ymm\rh0,%ymm12
+vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0
+vpsubw %ymm\rl1,%ymm\rh1,%ymm13
+
+vpmullw %ymm\zl0,%ymm12,%ymm\rh0
+vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1
+vpsubw %ymm\rl2,%ymm\rh2,%ymm14
+
+vpmullw %ymm\zl0,%ymm13,%ymm\rh1
+vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2
+vpsubw %ymm\rl3,%ymm\rh3,%ymm15
+
+vpmullw %ymm\zl1,%ymm14,%ymm\rh2
+vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3
+vpmullw %ymm\zl1,%ymm15,%ymm\rh3
+
+vpmulhw %ymm\zh0,%ymm12,%ymm12
+vpmulhw %ymm\zh0,%ymm13,%ymm13
+
+vpmulhw %ymm\zh1,%ymm14,%ymm14
+vpmulhw %ymm\zh1,%ymm15,%ymm15
+
+vpmulhw %ymm0,%ymm\rh0,%ymm\rh0
+
+vpmulhw %ymm0,%ymm\rh1,%ymm\rh1
+
+vpmulhw %ymm0,%ymm\rh2,%ymm\rh2
+vpmulhw %ymm0,%ymm\rh3,%ymm\rh3
+
+#
+
+#
+
+vpsubw %ymm\rh0,%ymm12,%ymm\rh0
+
+vpsubw %ymm\rh1,%ymm13,%ymm\rh1
+
+vpsubw %ymm\rh2,%ymm14,%ymm\rh2
+vpsubw %ymm\rh3,%ymm15,%ymm\rh3
+.endm
+
+.macro intt_levels0t5 off
+/* level 0 */
+vmovdqa _16XFLO*2(%rsi),%ymm2
+vmovdqa _16XFHI*2(%rsi),%ymm3
+
+vmovdqa (128*\off+ 0)*2(%rdi),%ymm4
+vmovdqa (128*\off+ 32)*2(%rdi),%ymm6
+vmovdqa (128*\off+ 16)*2(%rdi),%ymm5
+vmovdqa (128*\off+ 48)*2(%rdi),%ymm7
+
+fqmulprecomp 2,3,4
+fqmulprecomp 2,3,6
+fqmulprecomp 2,3,5
+fqmulprecomp 2,3,7
+
+vmovdqa (128*\off+ 64)*2(%rdi),%ymm8
+vmovdqa (128*\off+ 96)*2(%rdi),%ymm10
+vmovdqa (128*\off+ 80)*2(%rdi),%ymm9
+vmovdqa (128*\off+112)*2(%rdi),%ymm11
+
+fqmulprecomp 2,3,8
+fqmulprecomp 2,3,10
+fqmulprecomp 2,3,9
+fqmulprecomp 2,3,11
+
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3
+vmovdqa _REVIDXB*2(%rsi),%ymm12
+vpshufb %ymm12,%ymm15,%ymm15
+vpshufb %ymm12,%ymm1,%ymm1
+vpshufb %ymm12,%ymm2,%ymm2
+vpshufb %ymm12,%ymm3,%ymm3
+
+butterfly 4,5,8,9,6,7,10,11,15,1,2,3
+
+/* level 1 */
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3
+vmovdqa _REVIDXB*2(%rsi),%ymm1
+vpshufb %ymm1,%ymm2,%ymm2
+vpshufb %ymm1,%ymm3,%ymm3
+
+butterfly 4,5,6,7,8,9,10,11,2,2,3,3
+
+shuffle1 4,5,3,5
+shuffle1 6,7,4,7
+shuffle1 8,9,6,9
+shuffle1 10,11,8,11
+
+/* level 2 */
+vmovdqa _REVIDXD*2(%rsi),%ymm12
+vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2
+vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10
+
+butterfly 3,4,6,8,5,7,9,11,2,2,10,10
+
+vmovdqa _16XV*2(%rsi),%ymm1
+red16 3
+
+shuffle2 3,4,10,4
+shuffle2 6,8,3,8
+shuffle2 5,7,6,7
+shuffle2 9,11,5,11
+
+/* level 3 */
+vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2
+vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9
+
+butterfly 10,3,6,5,4,8,7,11,2,2,9,9
+
+shuffle4 10,3,9,3
+shuffle4 6,5,10,5
+shuffle4 4,8,6,8
+shuffle4 7,11,4,11
+
+/* level 4 */
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2
+vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7
+
+butterfly 9,10,6,4,3,5,8,11,2,2,7,7
+
+red16 9
+
+shuffle8 9,10,7,10
+shuffle8 6,4,9,4
+shuffle8 3,5,6,5
+shuffle8 8,11,3,11
+
+/* level 5 */
+vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2
+vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8
+
+butterfly 7,9,6,3,10,4,5,11,2,2,8,8
+
+vmovdqa %ymm7,(128*\off+ 0)*2(%rdi)
+vmovdqa %ymm9,(128*\off+ 16)*2(%rdi)
+vmovdqa %ymm6,(128*\off+ 32)*2(%rdi)
+vmovdqa %ymm3,(128*\off+ 48)*2(%rdi)
+vmovdqa %ymm10,(128*\off+ 64)*2(%rdi)
+vmovdqa %ymm4,(128*\off+ 80)*2(%rdi)
+vmovdqa %ymm5,(128*\off+ 96)*2(%rdi)
+vmovdqa %ymm11,(128*\off+112)*2(%rdi)
+.endm
+
+.macro intt_level6 off
+/* level 6 */
+vmovdqa (64*\off+ 0)*2(%rdi),%ymm4
+vmovdqa (64*\off+128)*2(%rdi),%ymm8
+vmovdqa (64*\off+ 16)*2(%rdi),%ymm5
+vmovdqa (64*\off+144)*2(%rdi),%ymm9
+vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2
+
+vmovdqa (64*\off+ 32)*2(%rdi),%ymm6
+vmovdqa (64*\off+160)*2(%rdi),%ymm10
+vmovdqa (64*\off+ 48)*2(%rdi),%ymm7
+vmovdqa (64*\off+176)*2(%rdi),%ymm11
+vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3
+
+butterfly 4,5,6,7,8,9,10,11
+
+.if \off == 0
+red16 4
+.endif
+
+vmovdqa %ymm4,(64*\off+ 0)*2(%rdi)
+vmovdqa %ymm5,(64*\off+ 16)*2(%rdi)
+vmovdqa %ymm6,(64*\off+ 32)*2(%rdi)
+vmovdqa %ymm7,(64*\off+ 48)*2(%rdi)
+vmovdqa %ymm8,(64*\off+128)*2(%rdi)
+vmovdqa %ymm9,(64*\off+144)*2(%rdi)
+vmovdqa %ymm10,(64*\off+160)*2(%rdi)
+vmovdqa %ymm11,(64*\off+176)*2(%rdi)
+.endm
+
+.text
+.global cdecl(invntt_avx)
+cdecl(invntt_avx):
+vmovdqa _16XQ*2(%rsi),%ymm0
+
+intt_levels0t5 0
+intt_levels0t5 1
+
+intt_level6 0
+intt_level6 1
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.c
new file mode 100644
index 0000000000..63abc1029c
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.c
@@ -0,0 +1,169 @@
+#include
+#include
+#include
+#include "params.h"
+#include "kem.h"
+#include "indcpa.h"
+#include "verify.h"
+#include "symmetric.h"
+#include "randombytes.h"
+/*************************************************
+* Name: crypto_kem_keypair_derand
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+* - uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_keypair_derand(uint8_t *pk,
+ uint8_t *sk,
+ const uint8_t *coins)
+{
+ indcpa_keypair_derand(pk, sk, coins);
+ memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ /* Value z for pseudo-random output on reject */
+ memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_keypair
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_keypair(uint8_t *pk,
+ uint8_t *sk)
+{
+ uint8_t coins[2*KYBER_SYMBYTES];
+ randombytes(coins, 2*KYBER_SYMBYTES);
+ crypto_kem_keypair_derand(pk, sk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc_derand
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - const uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_enc_derand(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk,
+ const uint8_t *coins)
+{
+ uint8_t buf[2*KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2*KYBER_SYMBYTES];
+
+ memcpy(buf, coins, KYBER_SYMBYTES);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_g(kr, buf, 2*KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES);
+
+ memcpy(ss,kr,KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_enc(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk)
+{
+ uint8_t coins[KYBER_SYMBYTES];
+ randombytes(coins, KYBER_SYMBYTES);
+ crypto_kem_enc_derand(ct, ss, pk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_dec
+*
+* Description: Generates shared secret for given
+* cipher text and private key
+*
+* Arguments: - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *ct: pointer to input cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - const uint8_t *sk: pointer to input private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0.
+*
+* On failure, ss will contain a pseudo-random value.
+**************************************************/
+int crypto_kem_dec(uint8_t *ss,
+ const uint8_t *ct,
+ const uint8_t *sk)
+{
+ int fail;
+ uint8_t buf[2*KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2*KYBER_SYMBYTES];
+ uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES];
+ const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES;
+
+ indcpa_dec(buf, ct, sk);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES);
+ hash_g(kr, buf, 2*KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES);
+
+ fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES);
+
+ /* Compute rejection key */
+ rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct);
+
+ /* Copy true key to return buffer if fail is false */
+ cmov(ss,kr,KYBER_SYMBYTES,!fail);
+
+ return 0;
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.h
new file mode 100644
index 0000000000..234f11966b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/kem.h
@@ -0,0 +1,35 @@
+#ifndef KEM_H
+#define KEM_H
+
+#include
+#include "params.h"
+
+#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES
+#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES
+#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES
+#define CRYPTO_BYTES KYBER_SSBYTES
+
+#if (KYBER_K == 2)
+#define CRYPTO_ALGNAME "Kyber512"
+#elif (KYBER_K == 3)
+#define CRYPTO_ALGNAME "Kyber768"
+#elif (KYBER_K == 4)
+#define CRYPTO_ALGNAME "Kyber1024"
+#endif
+
+#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand)
+int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+
+#define crypto_kem_keypair KYBER_NAMESPACE(keypair)
+int crypto_kem_keypair(uint8_t *pk, uint8_t *sk);
+
+#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand)
+int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+
+#define crypto_kem_enc KYBER_NAMESPACE(enc)
+int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+
+#define crypto_kem_dec KYBER_NAMESPACE(dec)
+int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.S
new file mode 100644
index 0000000000..0ce7b41297
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.S
@@ -0,0 +1,189 @@
+#include "consts.h"
+.include "shuffle.inc"
+
+.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2
+vpmullw %ymm\zl0,%ymm\rh0,%ymm12
+vpmullw %ymm\zl0,%ymm\rh1,%ymm13
+
+vpmullw %ymm\zl1,%ymm\rh2,%ymm14
+vpmullw %ymm\zl1,%ymm\rh3,%ymm15
+
+vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0
+vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1
+
+vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2
+vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3
+.endm
+
+.macro reduce
+vpmulhw %ymm0,%ymm12,%ymm12
+vpmulhw %ymm0,%ymm13,%ymm13
+
+vpmulhw %ymm0,%ymm14,%ymm14
+vpmulhw %ymm0,%ymm15,%ymm15
+.endm
+
+.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3
+vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln
+vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0
+vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0
+
+vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1
+vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1
+vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2
+
+vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2
+vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3
+
+vpsubw %ymm12,%ymm\rln,%ymm\rln
+vpaddw %ymm12,%ymm\rh0,%ymm\rh0
+vpsubw %ymm13,%ymm\rl0,%ymm\rl0
+
+vpaddw %ymm13,%ymm\rh1,%ymm\rh1
+vpsubw %ymm14,%ymm\rl1,%ymm\rl1
+vpaddw %ymm14,%ymm\rh2,%ymm\rh2
+
+vpsubw %ymm15,%ymm\rl2,%ymm\rl2
+vpaddw %ymm15,%ymm\rh3,%ymm\rh3
+.endm
+
+.macro level0 off
+vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15
+vmovdqa (64*\off+128)*2(%rdi),%ymm8
+vmovdqa (64*\off+144)*2(%rdi),%ymm9
+vmovdqa (64*\off+160)*2(%rdi),%ymm10
+vmovdqa (64*\off+176)*2(%rdi),%ymm11
+vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2
+
+mul 8,9,10,11
+
+vmovdqa (64*\off+ 0)*2(%rdi),%ymm4
+vmovdqa (64*\off+ 16)*2(%rdi),%ymm5
+vmovdqa (64*\off+ 32)*2(%rdi),%ymm6
+vmovdqa (64*\off+ 48)*2(%rdi),%ymm7
+
+reduce
+update 3,4,5,6,7,8,9,10,11
+
+vmovdqa %ymm3,(64*\off+ 0)*2(%rdi)
+vmovdqa %ymm4,(64*\off+ 16)*2(%rdi)
+vmovdqa %ymm5,(64*\off+ 32)*2(%rdi)
+vmovdqa %ymm6,(64*\off+ 48)*2(%rdi)
+vmovdqa %ymm8,(64*\off+128)*2(%rdi)
+vmovdqa %ymm9,(64*\off+144)*2(%rdi)
+vmovdqa %ymm10,(64*\off+160)*2(%rdi)
+vmovdqa %ymm11,(64*\off+176)*2(%rdi)
+.endm
+
+.macro levels1t6 off
+/* level 1 */
+vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15
+vmovdqa (128*\off+ 64)*2(%rdi),%ymm8
+vmovdqa (128*\off+ 80)*2(%rdi),%ymm9
+vmovdqa (128*\off+ 96)*2(%rdi),%ymm10
+vmovdqa (128*\off+112)*2(%rdi),%ymm11
+vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2
+
+mul 8,9,10,11
+
+vmovdqa (128*\off+ 0)*2(%rdi),%ymm4
+vmovdqa (128*\off+ 16)*2(%rdi),%ymm5
+vmovdqa (128*\off+ 32)*2(%rdi),%ymm6
+vmovdqa (128*\off+ 48)*2(%rdi),%ymm7
+
+reduce
+update 3,4,5,6,7,8,9,10,11
+
+/* level 2 */
+shuffle8 5,10,7,10
+shuffle8 6,11,5,11
+
+vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2
+
+mul 7,10,5,11
+
+shuffle8 3,8,6,8
+shuffle8 4,9,3,9
+
+reduce
+update 4,6,8,3,9,7,10,5,11
+
+/* level 3 */
+shuffle4 8,5,9,5
+shuffle4 3,11,8,11
+
+vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2
+
+mul 9,5,8,11
+
+shuffle4 4,7,3,7
+shuffle4 6,10,4,10
+
+reduce
+update 6,3,7,4,10,9,5,8,11
+
+/* level 4 */
+shuffle2 7,8,10,8
+shuffle2 4,11,7,11
+
+vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2
+
+mul 10,8,7,11
+
+shuffle2 6,9,4,9
+shuffle2 3,5,6,5
+
+reduce
+update 3,4,9,6,5,10,8,7,11
+
+/* level 5 */
+shuffle1 9,7,5,7
+shuffle1 6,11,9,11
+
+vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2
+
+mul 5,7,9,11
+
+shuffle1 3,10,6,10
+shuffle1 4,8,3,8
+
+reduce
+update 4,6,10,3,8,5,7,9,11
+
+/* level 6 */
+vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14
+vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15
+vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8
+vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2
+
+mul 10,3,9,11,14,15,8,2
+
+reduce
+update 8,4,6,5,7,10,3,9,11
+
+vmovdqa %ymm8,(128*\off+ 0)*2(%rdi)
+vmovdqa %ymm4,(128*\off+ 16)*2(%rdi)
+vmovdqa %ymm10,(128*\off+ 32)*2(%rdi)
+vmovdqa %ymm3,(128*\off+ 48)*2(%rdi)
+vmovdqa %ymm6,(128*\off+ 64)*2(%rdi)
+vmovdqa %ymm5,(128*\off+ 80)*2(%rdi)
+vmovdqa %ymm9,(128*\off+ 96)*2(%rdi)
+vmovdqa %ymm11,(128*\off+112)*2(%rdi)
+.endm
+
+.text
+.global cdecl(ntt_avx)
+cdecl(ntt_avx):
+vmovdqa _16XQ*2(%rsi),%ymm0
+
+level0 0
+level0 1
+
+levels1t6 0
+levels1t6 1
+
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.h
new file mode 100644
index 0000000000..a4f48e343b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/ntt.h
@@ -0,0 +1,28 @@
+#ifndef NTT_H
+#define NTT_H
+
+#include
+#include
+
+#define ntt_avx KYBER_NAMESPACE(ntt_avx)
+void ntt_avx(__m256i *r, const __m256i *qdata);
+#define invntt_avx KYBER_NAMESPACE(invntt_avx)
+void invntt_avx(__m256i *r, const __m256i *qdata);
+
+#define nttpack_avx KYBER_NAMESPACE(nttpack_avx)
+void nttpack_avx(__m256i *r, const __m256i *qdata);
+#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx)
+void nttunpack_avx(__m256i *r, const __m256i *qdata);
+
+#define basemul_avx KYBER_NAMESPACE(basemul_avx)
+void basemul_avx(__m256i *r,
+ const __m256i *a,
+ const __m256i *b,
+ const __m256i *qdata);
+
+#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx)
+void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata);
+#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx)
+void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/params.h
new file mode 100644
index 0000000000..fdc688ea2b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/params.h
@@ -0,0 +1,68 @@
+#ifndef PARAMS_H
+#define PARAMS_H
+
+#ifndef KYBER_K
+#define KYBER_K 3 /* Change this for different security strengths */
+#endif
+
+//#define KYBER_90S /* Uncomment this if you want the 90S variant */
+
+/* Don't change parameters below this line */
+#if (KYBER_K == 2)
+#ifdef KYBER_90S
+#define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s
+#else
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_avx2_##s
+#endif
+#elif (KYBER_K == 3)
+#ifdef KYBER_90S
+#define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s
+#else
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_avx2_##s
+#endif
+#elif (KYBER_K == 4)
+#ifdef KYBER_90S
+#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s
+#else
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_avx2_##s
+#endif
+#else
+#error "KYBER_K must be in {2,3,4}"
+#endif
+
+#define KYBER_N 256
+#define KYBER_Q 3329
+
+#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */
+#define KYBER_SSBYTES 32 /* size in bytes of shared key */
+
+#define KYBER_POLYBYTES 384
+#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES)
+
+#if KYBER_K == 2
+#define KYBER_ETA1 3
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 3
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 4
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 160
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352)
+#endif
+
+#define KYBER_ETA2 2
+
+#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES)
+#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES)
+#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES)
+#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES)
+
+#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES)
+/* 32 bytes of additional space to save H(pk) */
+#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES)
+#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES)
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.c
new file mode 100644
index 0000000000..681fd6d23e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.c
@@ -0,0 +1,519 @@
+#include
+#include
+#include
+#include "align.h"
+#include "fips202x4.h"
+#include "params.h"
+#include "poly.h"
+#include "ntt.h"
+#include "consts.h"
+#include "reduce.h"
+#include "cbd.h"
+#include "symmetric.h"
+
+/*************************************************
+* Name: poly_compress
+*
+* Description: Compression and subsequent serialization of a polynomial.
+* The coefficients of the input polynomial are assumed to
+* lie in the invertal [0,q], i.e. the polynomial must be reduced
+* by poly_reduce().
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (of length KYBER_POLYCOMPRESSEDBYTES)
+* - const poly *a: pointer to input polynomial
+**************************************************/
+#if (KYBER_POLYCOMPRESSEDBYTES == 128)
+void poly_compress(uint8_t r[128], const poly * restrict a)
+{
+ unsigned int i;
+ __m256i f0, f1, f2, f3;
+ const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
+ const __m256i shift1 = _mm256_set1_epi16(1 << 9);
+ const __m256i mask = _mm256_set1_epi16(15);
+ const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1);
+ const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0);
+
+ for(i=0;ivec[4*i+0]);
+ f1 = _mm256_load_si256(&a->vec[4*i+1]);
+ f2 = _mm256_load_si256(&a->vec[4*i+2]);
+ f3 = _mm256_load_si256(&a->vec[4*i+3]);
+ f0 = _mm256_mulhi_epi16(f0,v);
+ f1 = _mm256_mulhi_epi16(f1,v);
+ f2 = _mm256_mulhi_epi16(f2,v);
+ f3 = _mm256_mulhi_epi16(f3,v);
+ f0 = _mm256_mulhrs_epi16(f0,shift1);
+ f1 = _mm256_mulhrs_epi16(f1,shift1);
+ f2 = _mm256_mulhrs_epi16(f2,shift1);
+ f3 = _mm256_mulhrs_epi16(f3,shift1);
+ f0 = _mm256_and_si256(f0,mask);
+ f1 = _mm256_and_si256(f1,mask);
+ f2 = _mm256_and_si256(f2,mask);
+ f3 = _mm256_and_si256(f3,mask);
+ f0 = _mm256_packus_epi16(f0,f1);
+ f2 = _mm256_packus_epi16(f2,f3);
+ f0 = _mm256_maddubs_epi16(f0,shift2);
+ f2 = _mm256_maddubs_epi16(f2,shift2);
+ f0 = _mm256_packus_epi16(f0,f2);
+ f0 = _mm256_permutevar8x32_epi32(f0,permdidx);
+ _mm256_storeu_si256((__m256i *)&r[32*i],f0);
+ }
+}
+
+void poly_decompress(poly * restrict r, const uint8_t a[128])
+{
+ unsigned int i;
+ __m128i t;
+ __m256i f;
+ const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]);
+ const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4,
+ 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0);
+ const __m256i mask = _mm256_set1_epi32(0x00F0000F);
+ const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048);
+
+ for(i=0;ivec[i],f);
+ }
+}
+
+#elif (KYBER_POLYCOMPRESSEDBYTES == 160)
+void poly_compress(uint8_t r[160], const poly * restrict a)
+{
+ unsigned int i;
+ __m256i f0, f1;
+ __m128i t0, t1;
+ const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
+ const __m256i shift1 = _mm256_set1_epi16(1 << 10);
+ const __m256i mask = _mm256_set1_epi16(31);
+ const __m256i shift2 = _mm256_set1_epi16((32 << 8) + 1);
+ const __m256i shift3 = _mm256_set1_epi32((1024 << 16) + 1);
+ const __m256i sllvdidx = _mm256_set1_epi64x(12);
+ const __m256i shufbidx = _mm256_set_epi8( 8,-1,-1,-1,-1,-1, 4, 3, 2, 1, 0,-1,12,11,10, 9,
+ -1,12,11,10, 9, 8,-1,-1,-1,-1,-1 ,4, 3, 2, 1, 0);
+
+ for(i=0;ivec[2*i+0]);
+ f1 = _mm256_load_si256(&a->vec[2*i+1]);
+ f0 = _mm256_mulhi_epi16(f0,v);
+ f1 = _mm256_mulhi_epi16(f1,v);
+ f0 = _mm256_mulhrs_epi16(f0,shift1);
+ f1 = _mm256_mulhrs_epi16(f1,shift1);
+ f0 = _mm256_and_si256(f0,mask);
+ f1 = _mm256_and_si256(f1,mask);
+ f0 = _mm256_packus_epi16(f0,f1);
+ f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7
+ f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3
+ f0 = _mm256_sllv_epi32(f0,sllvdidx);
+ f0 = _mm256_srlv_epi64(f0,sllvdidx);
+ f0 = _mm256_shuffle_epi8(f0,shufbidx);
+ t0 = _mm256_castsi256_si128(f0);
+ t1 = _mm256_extracti128_si256(f0,1);
+ t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx));
+ _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0);
+ memcpy(&r[20*i+16],&t1,4);
+ }
+}
+
+void poly_decompress(poly * restrict r, const uint8_t a[160])
+{
+ unsigned int i;
+ __m128i t;
+ __m256i f;
+ int16_t ti;
+ const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]);
+ const __m256i shufbidx = _mm256_set_epi8(9,9,9,8,8,8,8,7,7,6,6,6,6,5,5,5,
+ 4,4,4,3,3,3,3,2,2,1,1,1,1,0,0,0);
+ const __m256i mask = _mm256_set_epi16(248,1984,62,496,3968,124,992,31,
+ 248,1984,62,496,3968,124,992,31);
+ const __m256i shift = _mm256_set_epi16(128,16,512,64,8,256,32,1024,
+ 128,16,512,64,8,256,32,1024);
+
+ for(i=0;ivec[i],f);
+ }
+}
+
+#endif
+
+/*************************************************
+* Name: poly_tobytes
+*
+* Description: Serialization of a polynomial in NTT representation.
+* The coefficients of the input polynomial are assumed to
+* lie in the invertal [0,q], i.e. the polynomial must be reduced
+* by poly_reduce(). The coefficients are orderd as output by
+* poly_ntt(); the serialized output coefficients are in bitreversed
+* order.
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYBYTES bytes)
+* - poly *a: pointer to input polynomial
+**************************************************/
+void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a)
+{
+ ntttobytes_avx(r, a->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_frombytes
+*
+* Description: De-serialization of a polynomial;
+* inverse of poly_tobytes
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *a: pointer to input byte array
+* (of KYBER_POLYBYTES bytes)
+**************************************************/
+void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES])
+{
+ nttfrombytes_avx(r->vec, a, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_frommsg
+*
+* Description: Convert 32-byte message to polynomial
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *msg: pointer to input message
+**************************************************/
+void poly_frommsg(poly * restrict r, const uint8_t msg[KYBER_INDCPA_MSGBYTES])
+{
+#if (KYBER_INDCPA_MSGBYTES != 32)
+#error "KYBER_INDCPA_MSGBYTES must be equal to 32!"
+#endif
+ __m256i f, g0, g1, g2, g3, h0, h1, h2, h3;
+ const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3));
+ const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0));
+ const __m256i hqs = _mm256_set1_epi16((KYBER_Q+1)/2);
+
+#define FROMMSG64(i) \
+ g3 = _mm256_shuffle_epi32(f,0x55*i); \
+ g3 = _mm256_sllv_epi32(g3,shift); \
+ g3 = _mm256_shuffle_epi8(g3,idx); \
+ g0 = _mm256_slli_epi16(g3,12); \
+ g1 = _mm256_slli_epi16(g3,8); \
+ g2 = _mm256_slli_epi16(g3,4); \
+ g0 = _mm256_srai_epi16(g0,15); \
+ g1 = _mm256_srai_epi16(g1,15); \
+ g2 = _mm256_srai_epi16(g2,15); \
+ g3 = _mm256_srai_epi16(g3,15); \
+ g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \
+ g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \
+ g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \
+ g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \
+ h0 = _mm256_unpacklo_epi64(g0,g1); \
+ h2 = _mm256_unpackhi_epi64(g0,g1); \
+ h1 = _mm256_unpacklo_epi64(g2,g3); \
+ h3 = _mm256_unpackhi_epi64(g2,g3); \
+ g0 = _mm256_permute2x128_si256(h0,h1,0x20); \
+ g2 = _mm256_permute2x128_si256(h0,h1,0x31); \
+ g1 = _mm256_permute2x128_si256(h2,h3,0x20); \
+ g3 = _mm256_permute2x128_si256(h2,h3,0x31); \
+ _mm256_store_si256(&r->vec[0+2*i+0],g0); \
+ _mm256_store_si256(&r->vec[0+2*i+1],g1); \
+ _mm256_store_si256(&r->vec[8+2*i+0],g2); \
+ _mm256_store_si256(&r->vec[8+2*i+1],g3)
+
+ f = _mm256_loadu_si256((__m256i *)msg);
+ FROMMSG64(0);
+ FROMMSG64(1);
+ FROMMSG64(2);
+ FROMMSG64(3);
+}
+
+/*************************************************
+* Name: poly_tomsg
+*
+* Description: Convert polynomial to 32-byte message.
+* The coefficients of the input polynomial are assumed to
+* lie in the invertal [0,q], i.e. the polynomial must be reduced
+* by poly_reduce().
+*
+* Arguments: - uint8_t *msg: pointer to output message
+* - poly *a: pointer to input polynomial
+**************************************************/
+void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly * restrict a)
+{
+ unsigned int i;
+ uint32_t small;
+ __m256i f0, f1, g0, g1;
+ const __m256i hq = _mm256_set1_epi16((KYBER_Q - 1)/2);
+ const __m256i hhq = _mm256_set1_epi16((KYBER_Q - 1)/4);
+
+ for(i=0;ivec[2*i+0]);
+ f1 = _mm256_load_si256(&a->vec[2*i+1]);
+ f0 = _mm256_sub_epi16(hq, f0);
+ f1 = _mm256_sub_epi16(hq, f1);
+ g0 = _mm256_srai_epi16(f0, 15);
+ g1 = _mm256_srai_epi16(f1, 15);
+ f0 = _mm256_xor_si256(f0, g0);
+ f1 = _mm256_xor_si256(f1, g1);
+ f0 = _mm256_sub_epi16(f0, hhq);
+ f1 = _mm256_sub_epi16(f1, hhq);
+ f0 = _mm256_packs_epi16(f0, f1);
+ f0 = _mm256_permute4x64_epi64(f0, 0xD8);
+ small = _mm256_movemask_epi8(f0);
+ memcpy(&msg[4*i], &small, 4);
+ }
+}
+
+/*************************************************
+* Name: poly_getnoise_eta1
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA1
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ ALIGNED_UINT8(KYBER_ETA1*KYBER_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1
+ prf(buf.coeffs, KYBER_ETA1*KYBER_N/4, seed, nonce);
+ poly_cbd_eta1(r, buf.vec);
+}
+
+/*************************************************
+* Name: poly_getnoise_eta2
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ ALIGNED_UINT8(KYBER_ETA2*KYBER_N/4) buf;
+ prf(buf.coeffs, KYBER_ETA2*KYBER_N/4, seed, nonce);
+ poly_cbd_eta2(r, buf.vec);
+}
+
+#ifndef KYBER_90S
+#define NOISE_NBLOCKS ((KYBER_ETA1*KYBER_N/4+SHAKE256_RATE-1)/SHAKE256_RATE)
+void poly_getnoise_eta1_4x(poly *r0,
+ poly *r1,
+ poly *r2,
+ poly *r3,
+ const uint8_t seed[32],
+ uint8_t nonce0,
+ uint8_t nonce1,
+ uint8_t nonce2,
+ uint8_t nonce3)
+{
+ ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4];
+ __m256i f;
+ shake256x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ buf[0].coeffs[32] = nonce0;
+ buf[1].coeffs[32] = nonce1;
+ buf[2].coeffs[32] = nonce2;
+ buf[3].coeffs[32] = nonce3;
+
+ shake256x4_inc_init(&state);
+ shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33);
+ shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state);
+ shake256x4_inc_ctx_release(&state);
+
+ poly_cbd_eta1(r0, buf[0].vec);
+ poly_cbd_eta1(r1, buf[1].vec);
+ poly_cbd_eta1(r2, buf[2].vec);
+ poly_cbd_eta1(r3, buf[3].vec);
+}
+
+#if KYBER_K == 2
+void poly_getnoise_eta1122_4x(poly *r0,
+ poly *r1,
+ poly *r2,
+ poly *r3,
+ const uint8_t seed[32],
+ uint8_t nonce0,
+ uint8_t nonce1,
+ uint8_t nonce2,
+ uint8_t nonce3)
+{
+ ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4];
+ __m256i f;
+ shake256x4incctx state;
+
+ f = _mm256_loadu_si256((__m256i *)seed);
+ _mm256_store_si256(buf[0].vec, f);
+ _mm256_store_si256(buf[1].vec, f);
+ _mm256_store_si256(buf[2].vec, f);
+ _mm256_store_si256(buf[3].vec, f);
+
+ buf[0].coeffs[32] = nonce0;
+ buf[1].coeffs[32] = nonce1;
+ buf[2].coeffs[32] = nonce2;
+ buf[3].coeffs[32] = nonce3;
+
+ shake256x4_inc_init(&state);
+ shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33);
+ shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state);
+ shake256x4_inc_ctx_release(&state);
+
+ poly_cbd_eta1(r0, buf[0].vec);
+ poly_cbd_eta1(r1, buf[1].vec);
+ poly_cbd_eta2(r2, buf[2].vec);
+ poly_cbd_eta2(r3, buf[3].vec);
+}
+#endif
+#endif
+
+/*************************************************
+* Name: poly_ntt
+*
+* Description: Computes negacyclic number-theoretic transform (NTT) of
+* a polynomial in place.
+* Input coefficients assumed to be in normal order,
+* output coefficients are in special order that is natural
+* for the vectorization. Input coefficients are assumed to be
+* bounded by q in absolute value, output coefficients are bounded
+* by 16118 in absolute value.
+*
+* Arguments: - poly *r: pointer to in/output polynomial
+**************************************************/
+void poly_ntt(poly *r)
+{
+ ntt_avx(r->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_invntt_tomont
+*
+* Description: Computes inverse of negacyclic number-theoretic transform (NTT)
+* of a polynomial in place;
+* Input coefficients assumed to be in special order from vectorized
+* forward ntt, output in normal order. Input coefficients can be
+* arbitrary 16-bit integers, output coefficients are bounded by 14870
+* in absolute value.
+*
+* Arguments: - poly *a: pointer to in/output polynomial
+**************************************************/
+void poly_invntt_tomont(poly *r)
+{
+ invntt_avx(r->vec, qdata.vec);
+}
+
+void poly_nttunpack(poly *r)
+{
+ nttunpack_avx(r->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_basemul_montgomery
+*
+* Description: Multiplication of two polynomials in NTT domain.
+* One of the input polynomials needs to have coefficients
+* bounded by q, the other polynomial can have arbitrary
+* coefficients. Output coefficients are bounded by 6656.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_basemul_montgomery(poly *r, const poly *a, const poly *b)
+{
+ basemul_avx(r->vec, a->vec, b->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_tomont
+*
+* Description: Inplace conversion of all coefficients of a polynomial
+* from normal domain to Montgomery domain
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+void poly_tomont(poly *r)
+{
+ tomont_avx(r->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_reduce
+*
+* Description: Applies Barrett reduction to all coefficients of a polynomial
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+void poly_reduce(poly *r)
+{
+ reduce_avx(r->vec, qdata.vec);
+}
+
+/*************************************************
+* Name: poly_add
+*
+* Description: Add two polynomials. No modular reduction
+* is performed.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_add(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ __m256i f0, f1;
+
+ for(i=0;ivec[i]);
+ f1 = _mm256_load_si256(&b->vec[i]);
+ f0 = _mm256_add_epi16(f0, f1);
+ _mm256_store_si256(&r->vec[i], f0);
+ }
+}
+
+/*************************************************
+* Name: poly_sub
+*
+* Description: Subtract two polynomials. No modular reduction
+* is performed.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_sub(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ __m256i f0, f1;
+
+ for(i=0;ivec[i]);
+ f1 = _mm256_load_si256(&b->vec[i]);
+ f0 = _mm256_sub_epi16(f0, f1);
+ _mm256_store_si256(&r->vec[i], f0);
+ }
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.h
new file mode 100644
index 0000000000..6a9cf71c70
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/poly.h
@@ -0,0 +1,77 @@
+#ifndef POLY_H
+#define POLY_H
+
+#include
+#include "align.h"
+#include "params.h"
+
+typedef ALIGNED_INT16(KYBER_N) poly;
+
+#define poly_compress KYBER_NAMESPACE(poly_compress)
+void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a);
+#define poly_decompress KYBER_NAMESPACE(poly_decompress)
+void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]);
+
+#define poly_tobytes KYBER_NAMESPACE(poly_tobytes)
+void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a);
+#define poly_frombytes KYBER_NAMESPACE(poly_frombytes)
+void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]);
+
+#define poly_frommsg KYBER_NAMESPACE(poly_frommsg)
+void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]);
+#define poly_tomsg KYBER_NAMESPACE(poly_tomsg)
+void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r);
+
+#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1)
+void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2)
+void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#ifndef KYBER_90S
+#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x)
+void poly_getnoise_eta1_4x(poly *r0,
+ poly *r1,
+ poly *r2,
+ poly *r3,
+ const uint8_t seed[32],
+ uint8_t nonce0,
+ uint8_t nonce1,
+ uint8_t nonce2,
+ uint8_t nonce3);
+
+#if KYBER_K == 2
+#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x)
+void poly_getnoise_eta1122_4x(poly *r0,
+ poly *r1,
+ poly *r2,
+ poly *r3,
+ const uint8_t seed[32],
+ uint8_t nonce0,
+ uint8_t nonce1,
+ uint8_t nonce2,
+ uint8_t nonce3);
+#endif
+#endif
+
+
+#define poly_ntt KYBER_NAMESPACE(poly_ntt)
+void poly_ntt(poly *r);
+#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont)
+void poly_invntt_tomont(poly *r);
+#define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack)
+void poly_nttunpack(poly *r);
+#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery)
+void poly_basemul_montgomery(poly *r, const poly *a, const poly *b);
+#define poly_tomont KYBER_NAMESPACE(poly_tomont)
+void poly_tomont(poly *r);
+
+#define poly_reduce KYBER_NAMESPACE(poly_reduce)
+void poly_reduce(poly *r);
+
+#define poly_add KYBER_NAMESPACE(poly_add)
+void poly_add(poly *r, const poly *a, const poly *b);
+#define poly_sub KYBER_NAMESPACE(poly_sub)
+void poly_sub(poly *r, const poly *a, const poly *b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.c
new file mode 100644
index 0000000000..a0174b7b3f
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.c
@@ -0,0 +1,307 @@
+#include
+#include
+#include
+#include "params.h"
+#include "polyvec.h"
+#include "poly.h"
+#include "ntt.h"
+#include "consts.h"
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+static void poly_compress10(uint8_t r[320], const poly * restrict a)
+{
+ unsigned int i;
+ __m256i f0, f1, f2;
+ __m128i t0, t1;
+ const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
+ const __m256i v8 = _mm256_slli_epi16(v,3);
+ const __m256i off = _mm256_set1_epi16(15);
+ const __m256i shift1 = _mm256_set1_epi16(1 << 12);
+ const __m256i mask = _mm256_set1_epi16(1023);
+ const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1);
+ const __m256i sllvdidx = _mm256_set1_epi64x(12);
+ const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9,
+ -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0);
+
+ for(i=0;ivec[i]);
+ f1 = _mm256_mullo_epi16(f0,v8);
+ f2 = _mm256_add_epi16(f0,off);
+ f0 = _mm256_slli_epi16(f0,3);
+ f0 = _mm256_mulhi_epi16(f0,v);
+ f2 = _mm256_sub_epi16(f1,f2);
+ f1 = _mm256_andnot_si256(f1,f2);
+ f1 = _mm256_srli_epi16(f1,15);
+ f0 = _mm256_sub_epi16(f0,f1);
+ f0 = _mm256_mulhrs_epi16(f0,shift1);
+ f0 = _mm256_and_si256(f0,mask);
+ f0 = _mm256_madd_epi16(f0,shift2);
+ f0 = _mm256_sllv_epi32(f0,sllvdidx);
+ f0 = _mm256_srli_epi64(f0,12);
+ f0 = _mm256_shuffle_epi8(f0,shufbidx);
+ t0 = _mm256_castsi256_si128(f0);
+ t1 = _mm256_extracti128_si256(f0,1);
+ t0 = _mm_blend_epi16(t0,t1,0xE0);
+ _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0);
+ memcpy(&r[20*i+16],&t1,4);
+ }
+}
+
+static void poly_decompress10(poly * restrict r, const uint8_t a[320+12])
+{
+ unsigned int i;
+ __m256i f;
+ const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q);
+ const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7,
+ 6, 5, 5, 4, 4, 3, 3, 2,
+ 9, 8, 8, 7, 7, 6, 6, 5,
+ 4, 3, 3, 2, 2, 1, 1, 0);
+ const __m256i sllvdidx = _mm256_set1_epi64x(4);
+ const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184);
+
+ for(i=0;ivec[i],f);
+ }
+}
+
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+static void poly_compress11(uint8_t r[352+2], const poly * restrict a)
+{
+ unsigned int i;
+ __m256i f0, f1, f2;
+ __m128i t0, t1;
+ const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]);
+ const __m256i v8 = _mm256_slli_epi16(v,3);
+ const __m256i off = _mm256_set1_epi16(36);
+ const __m256i shift1 = _mm256_set1_epi16(1 << 13);
+ const __m256i mask = _mm256_set1_epi16(2047);
+ const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1);
+ const __m256i sllvdidx = _mm256_set1_epi64x(10);
+ const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10);
+ const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5,
+ -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+
+ for(i=0;ivec[i]);
+ f1 = _mm256_mullo_epi16(f0,v8);
+ f2 = _mm256_add_epi16(f0,off);
+ f0 = _mm256_slli_epi16(f0,3);
+ f0 = _mm256_mulhi_epi16(f0,v);
+ f2 = _mm256_sub_epi16(f1,f2);
+ f1 = _mm256_andnot_si256(f1,f2);
+ f1 = _mm256_srli_epi16(f1,15);
+ f0 = _mm256_sub_epi16(f0,f1);
+ f0 = _mm256_mulhrs_epi16(f0,shift1);
+ f0 = _mm256_and_si256(f0,mask);
+ f0 = _mm256_madd_epi16(f0,shift2);
+ f0 = _mm256_sllv_epi32(f0,sllvdidx);
+ f1 = _mm256_bsrli_epi128(f0,8);
+ f0 = _mm256_srlv_epi64(f0,srlvqidx);
+ f1 = _mm256_slli_epi64(f1,34);
+ f0 = _mm256_add_epi64(f0,f1);
+ f0 = _mm256_shuffle_epi8(f0,shufbidx);
+ t0 = _mm256_castsi256_si128(f0);
+ t1 = _mm256_extracti128_si256(f0,1);
+ t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx));
+ _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0);
+ _mm_storel_epi64((__m128i *)&r[22*i+16],t1);
+ }
+}
+
+static void poly_decompress11(poly * restrict r, const uint8_t a[352+10])
+{
+ unsigned int i;
+ __m256i f;
+ const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]);
+ const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8,
+ 8, 7, 6, 5, 5, 4, 4, 3,
+ 10, 9, 9, 8, 7, 6, 6, 5,
+ 5, 4, 3, 2, 2, 1, 1, 0);
+ const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0);
+ const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0);
+ const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32);
+ const __m256i mask = _mm256_set1_epi16(32752);
+
+ for(i=0;ivec[i],f);
+ }
+}
+
+#endif
+
+/*************************************************
+* Name: polyvec_compress
+*
+* Description: Compress and serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECCOMPRESSEDBYTES)
+* - polyvec *a: pointer to input vector of polynomials
+**************************************************/
+void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a)
+{
+ unsigned int i;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ for(i=0;ivec[i]);
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ for(i=0;ivec[i]);
+#endif
+}
+
+/*************************************************
+* Name: polyvec_decompress
+*
+* Description: De-serialize and decompress vector of polynomials;
+* approximate inverse of polyvec_compress
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const uint8_t *a: pointer to input byte array
+* (of length KYBER_POLYVECCOMPRESSEDBYTES)
+**************************************************/
+void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12])
+{
+ unsigned int i;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ for(i=0;ivec[i],&a[320*i]);
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ for(i=0;ivec[i],&a[352*i]);
+#endif
+}
+
+/*************************************************
+* Name: polyvec_tobytes
+*
+* Description: Serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECBYTES)
+* - polyvec *a: pointer to input vector of polynomials
+**************************************************/
+void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_frombytes
+*
+* Description: De-serialize vector of polynomials;
+* inverse of polyvec_tobytes
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* - const polyvec *a: pointer to input vector of polynomials
+* (of length KYBER_POLYVECBYTES)
+**************************************************/
+void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES])
+{
+ unsigned int i;
+ for(i=0;ivec[i], a+i*KYBER_POLYBYTES);
+}
+
+/*************************************************
+* Name: polyvec_ntt
+*
+* Description: Apply forward NTT to all elements of a vector of polynomials
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+void polyvec_ntt(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_invntt_tomont
+*
+* Description: Apply inverse NTT to all elements of a vector of polynomials
+* and multiply by Montgomery factor 2^16
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+void polyvec_invntt_tomont(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_basemul_acc_montgomery
+*
+* Description: Multiply elements in a and b in NTT domain, accumulate into r,
+* and multiply by 2^-16.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ poly tmp;
+
+ poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]);
+ for(i=1;ivec[i],&b->vec[i]);
+ poly_add(r,r,&tmp);
+ }
+}
+
+/*************************************************
+* Name: polyvec_reduce
+*
+* Description: Applies Barrett reduction to each coefficient
+* of each element of a vector of polynomials;
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - polyvec *r: pointer to input/output polynomial
+**************************************************/
+void polyvec_reduce(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_add
+*
+* Description: Add vectors of polynomials
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ for(i=0;ivec[i], &a->vec[i], &b->vec[i]);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.h
new file mode 100644
index 0000000000..2ce23c31ff
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/polyvec.h
@@ -0,0 +1,36 @@
+#ifndef POLYVEC_H
+#define POLYVEC_H
+
+#include
+#include "params.h"
+#include "poly.h"
+
+typedef struct{
+ poly vec[KYBER_K];
+} polyvec;
+
+#define polyvec_compress KYBER_NAMESPACE(polyvec_compress)
+void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a);
+#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress)
+void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]);
+
+#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes)
+void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a);
+#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes)
+void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]);
+
+#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt)
+void polyvec_ntt(polyvec *r);
+#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont)
+void polyvec_invntt_tomont(polyvec *r);
+
+#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery)
+void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b);
+
+#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce)
+void polyvec_reduce(polyvec *r);
+
+#define polyvec_add KYBER_NAMESPACE(polyvec_add)
+void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/reduce.h
new file mode 100644
index 0000000000..5368185b5f
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/reduce.h
@@ -0,0 +1,12 @@
+#ifndef REDUCE_H
+#define REDUCE_H
+
+#include "params.h"
+#include
+
+#define reduce_avx KYBER_NAMESPACE(reduce_avx)
+void reduce_avx(__m256i *r, const __m256i *qdata);
+#define tomont_avx KYBER_NAMESPACE(tomont_avx)
+void tomont_avx(__m256i *r, const __m256i *qdata);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.c
new file mode 100644
index 0000000000..9060a44cb9
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.c
@@ -0,0 +1,398 @@
+#include
+#include
+#include
+#include "params.h"
+#include "consts.h"
+#include "rejsample.h"
+
+//#define BMI
+
+#ifndef BMI
+static const uint8_t idx[256][8] = {
+ {-1, -1, -1, -1, -1, -1, -1, -1},
+ { 0, -1, -1, -1, -1, -1, -1, -1},
+ { 2, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 2, -1, -1, -1, -1, -1, -1},
+ { 4, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 4, -1, -1, -1, -1, -1, -1},
+ { 2, 4, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 4, -1, -1, -1, -1, -1},
+ { 6, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 6, -1, -1, -1, -1, -1, -1},
+ { 2, 6, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 6, -1, -1, -1, -1, -1},
+ { 4, 6, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 6, -1, -1, -1, -1, -1},
+ { 2, 4, 6, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 6, -1, -1, -1, -1},
+ { 8, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 8, -1, -1, -1, -1, -1, -1},
+ { 2, 8, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 8, -1, -1, -1, -1, -1},
+ { 4, 8, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 8, -1, -1, -1, -1, -1},
+ { 2, 4, 8, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 8, -1, -1, -1, -1},
+ { 6, 8, -1, -1, -1, -1, -1, -1},
+ { 0, 6, 8, -1, -1, -1, -1, -1},
+ { 2, 6, 8, -1, -1, -1, -1, -1},
+ { 0, 2, 6, 8, -1, -1, -1, -1},
+ { 4, 6, 8, -1, -1, -1, -1, -1},
+ { 0, 4, 6, 8, -1, -1, -1, -1},
+ { 2, 4, 6, 8, -1, -1, -1, -1},
+ { 0, 2, 4, 6, 8, -1, -1, -1},
+ {10, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 10, -1, -1, -1, -1, -1, -1},
+ { 2, 10, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 10, -1, -1, -1, -1, -1},
+ { 4, 10, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 10, -1, -1, -1, -1, -1},
+ { 2, 4, 10, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 10, -1, -1, -1, -1},
+ { 6, 10, -1, -1, -1, -1, -1, -1},
+ { 0, 6, 10, -1, -1, -1, -1, -1},
+ { 2, 6, 10, -1, -1, -1, -1, -1},
+ { 0, 2, 6, 10, -1, -1, -1, -1},
+ { 4, 6, 10, -1, -1, -1, -1, -1},
+ { 0, 4, 6, 10, -1, -1, -1, -1},
+ { 2, 4, 6, 10, -1, -1, -1, -1},
+ { 0, 2, 4, 6, 10, -1, -1, -1},
+ { 8, 10, -1, -1, -1, -1, -1, -1},
+ { 0, 8, 10, -1, -1, -1, -1, -1},
+ { 2, 8, 10, -1, -1, -1, -1, -1},
+ { 0, 2, 8, 10, -1, -1, -1, -1},
+ { 4, 8, 10, -1, -1, -1, -1, -1},
+ { 0, 4, 8, 10, -1, -1, -1, -1},
+ { 2, 4, 8, 10, -1, -1, -1, -1},
+ { 0, 2, 4, 8, 10, -1, -1, -1},
+ { 6, 8, 10, -1, -1, -1, -1, -1},
+ { 0, 6, 8, 10, -1, -1, -1, -1},
+ { 2, 6, 8, 10, -1, -1, -1, -1},
+ { 0, 2, 6, 8, 10, -1, -1, -1},
+ { 4, 6, 8, 10, -1, -1, -1, -1},
+ { 0, 4, 6, 8, 10, -1, -1, -1},
+ { 2, 4, 6, 8, 10, -1, -1, -1},
+ { 0, 2, 4, 6, 8, 10, -1, -1},
+ {12, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 12, -1, -1, -1, -1, -1, -1},
+ { 2, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 12, -1, -1, -1, -1, -1},
+ { 4, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 12, -1, -1, -1, -1, -1},
+ { 2, 4, 12, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 12, -1, -1, -1, -1},
+ { 6, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 6, 12, -1, -1, -1, -1, -1},
+ { 2, 6, 12, -1, -1, -1, -1, -1},
+ { 0, 2, 6, 12, -1, -1, -1, -1},
+ { 4, 6, 12, -1, -1, -1, -1, -1},
+ { 0, 4, 6, 12, -1, -1, -1, -1},
+ { 2, 4, 6, 12, -1, -1, -1, -1},
+ { 0, 2, 4, 6, 12, -1, -1, -1},
+ { 8, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 8, 12, -1, -1, -1, -1, -1},
+ { 2, 8, 12, -1, -1, -1, -1, -1},
+ { 0, 2, 8, 12, -1, -1, -1, -1},
+ { 4, 8, 12, -1, -1, -1, -1, -1},
+ { 0, 4, 8, 12, -1, -1, -1, -1},
+ { 2, 4, 8, 12, -1, -1, -1, -1},
+ { 0, 2, 4, 8, 12, -1, -1, -1},
+ { 6, 8, 12, -1, -1, -1, -1, -1},
+ { 0, 6, 8, 12, -1, -1, -1, -1},
+ { 2, 6, 8, 12, -1, -1, -1, -1},
+ { 0, 2, 6, 8, 12, -1, -1, -1},
+ { 4, 6, 8, 12, -1, -1, -1, -1},
+ { 0, 4, 6, 8, 12, -1, -1, -1},
+ { 2, 4, 6, 8, 12, -1, -1, -1},
+ { 0, 2, 4, 6, 8, 12, -1, -1},
+ {10, 12, -1, -1, -1, -1, -1, -1},
+ { 0, 10, 12, -1, -1, -1, -1, -1},
+ { 2, 10, 12, -1, -1, -1, -1, -1},
+ { 0, 2, 10, 12, -1, -1, -1, -1},
+ { 4, 10, 12, -1, -1, -1, -1, -1},
+ { 0, 4, 10, 12, -1, -1, -1, -1},
+ { 2, 4, 10, 12, -1, -1, -1, -1},
+ { 0, 2, 4, 10, 12, -1, -1, -1},
+ { 6, 10, 12, -1, -1, -1, -1, -1},
+ { 0, 6, 10, 12, -1, -1, -1, -1},
+ { 2, 6, 10, 12, -1, -1, -1, -1},
+ { 0, 2, 6, 10, 12, -1, -1, -1},
+ { 4, 6, 10, 12, -1, -1, -1, -1},
+ { 0, 4, 6, 10, 12, -1, -1, -1},
+ { 2, 4, 6, 10, 12, -1, -1, -1},
+ { 0, 2, 4, 6, 10, 12, -1, -1},
+ { 8, 10, 12, -1, -1, -1, -1, -1},
+ { 0, 8, 10, 12, -1, -1, -1, -1},
+ { 2, 8, 10, 12, -1, -1, -1, -1},
+ { 0, 2, 8, 10, 12, -1, -1, -1},
+ { 4, 8, 10, 12, -1, -1, -1, -1},
+ { 0, 4, 8, 10, 12, -1, -1, -1},
+ { 2, 4, 8, 10, 12, -1, -1, -1},
+ { 0, 2, 4, 8, 10, 12, -1, -1},
+ { 6, 8, 10, 12, -1, -1, -1, -1},
+ { 0, 6, 8, 10, 12, -1, -1, -1},
+ { 2, 6, 8, 10, 12, -1, -1, -1},
+ { 0, 2, 6, 8, 10, 12, -1, -1},
+ { 4, 6, 8, 10, 12, -1, -1, -1},
+ { 0, 4, 6, 8, 10, 12, -1, -1},
+ { 2, 4, 6, 8, 10, 12, -1, -1},
+ { 0, 2, 4, 6, 8, 10, 12, -1},
+ {14, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 14, -1, -1, -1, -1, -1, -1},
+ { 2, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 2, 14, -1, -1, -1, -1, -1},
+ { 4, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 4, 14, -1, -1, -1, -1, -1},
+ { 2, 4, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 4, 14, -1, -1, -1, -1},
+ { 6, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 6, 14, -1, -1, -1, -1, -1},
+ { 2, 6, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 6, 14, -1, -1, -1, -1},
+ { 4, 6, 14, -1, -1, -1, -1, -1},
+ { 0, 4, 6, 14, -1, -1, -1, -1},
+ { 2, 4, 6, 14, -1, -1, -1, -1},
+ { 0, 2, 4, 6, 14, -1, -1, -1},
+ { 8, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 8, 14, -1, -1, -1, -1, -1},
+ { 2, 8, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 8, 14, -1, -1, -1, -1},
+ { 4, 8, 14, -1, -1, -1, -1, -1},
+ { 0, 4, 8, 14, -1, -1, -1, -1},
+ { 2, 4, 8, 14, -1, -1, -1, -1},
+ { 0, 2, 4, 8, 14, -1, -1, -1},
+ { 6, 8, 14, -1, -1, -1, -1, -1},
+ { 0, 6, 8, 14, -1, -1, -1, -1},
+ { 2, 6, 8, 14, -1, -1, -1, -1},
+ { 0, 2, 6, 8, 14, -1, -1, -1},
+ { 4, 6, 8, 14, -1, -1, -1, -1},
+ { 0, 4, 6, 8, 14, -1, -1, -1},
+ { 2, 4, 6, 8, 14, -1, -1, -1},
+ { 0, 2, 4, 6, 8, 14, -1, -1},
+ {10, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 10, 14, -1, -1, -1, -1, -1},
+ { 2, 10, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 10, 14, -1, -1, -1, -1},
+ { 4, 10, 14, -1, -1, -1, -1, -1},
+ { 0, 4, 10, 14, -1, -1, -1, -1},
+ { 2, 4, 10, 14, -1, -1, -1, -1},
+ { 0, 2, 4, 10, 14, -1, -1, -1},
+ { 6, 10, 14, -1, -1, -1, -1, -1},
+ { 0, 6, 10, 14, -1, -1, -1, -1},
+ { 2, 6, 10, 14, -1, -1, -1, -1},
+ { 0, 2, 6, 10, 14, -1, -1, -1},
+ { 4, 6, 10, 14, -1, -1, -1, -1},
+ { 0, 4, 6, 10, 14, -1, -1, -1},
+ { 2, 4, 6, 10, 14, -1, -1, -1},
+ { 0, 2, 4, 6, 10, 14, -1, -1},
+ { 8, 10, 14, -1, -1, -1, -1, -1},
+ { 0, 8, 10, 14, -1, -1, -1, -1},
+ { 2, 8, 10, 14, -1, -1, -1, -1},
+ { 0, 2, 8, 10, 14, -1, -1, -1},
+ { 4, 8, 10, 14, -1, -1, -1, -1},
+ { 0, 4, 8, 10, 14, -1, -1, -1},
+ { 2, 4, 8, 10, 14, -1, -1, -1},
+ { 0, 2, 4, 8, 10, 14, -1, -1},
+ { 6, 8, 10, 14, -1, -1, -1, -1},
+ { 0, 6, 8, 10, 14, -1, -1, -1},
+ { 2, 6, 8, 10, 14, -1, -1, -1},
+ { 0, 2, 6, 8, 10, 14, -1, -1},
+ { 4, 6, 8, 10, 14, -1, -1, -1},
+ { 0, 4, 6, 8, 10, 14, -1, -1},
+ { 2, 4, 6, 8, 10, 14, -1, -1},
+ { 0, 2, 4, 6, 8, 10, 14, -1},
+ {12, 14, -1, -1, -1, -1, -1, -1},
+ { 0, 12, 14, -1, -1, -1, -1, -1},
+ { 2, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 2, 12, 14, -1, -1, -1, -1},
+ { 4, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 4, 12, 14, -1, -1, -1, -1},
+ { 2, 4, 12, 14, -1, -1, -1, -1},
+ { 0, 2, 4, 12, 14, -1, -1, -1},
+ { 6, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 6, 12, 14, -1, -1, -1, -1},
+ { 2, 6, 12, 14, -1, -1, -1, -1},
+ { 0, 2, 6, 12, 14, -1, -1, -1},
+ { 4, 6, 12, 14, -1, -1, -1, -1},
+ { 0, 4, 6, 12, 14, -1, -1, -1},
+ { 2, 4, 6, 12, 14, -1, -1, -1},
+ { 0, 2, 4, 6, 12, 14, -1, -1},
+ { 8, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 8, 12, 14, -1, -1, -1, -1},
+ { 2, 8, 12, 14, -1, -1, -1, -1},
+ { 0, 2, 8, 12, 14, -1, -1, -1},
+ { 4, 8, 12, 14, -1, -1, -1, -1},
+ { 0, 4, 8, 12, 14, -1, -1, -1},
+ { 2, 4, 8, 12, 14, -1, -1, -1},
+ { 0, 2, 4, 8, 12, 14, -1, -1},
+ { 6, 8, 12, 14, -1, -1, -1, -1},
+ { 0, 6, 8, 12, 14, -1, -1, -1},
+ { 2, 6, 8, 12, 14, -1, -1, -1},
+ { 0, 2, 6, 8, 12, 14, -1, -1},
+ { 4, 6, 8, 12, 14, -1, -1, -1},
+ { 0, 4, 6, 8, 12, 14, -1, -1},
+ { 2, 4, 6, 8, 12, 14, -1, -1},
+ { 0, 2, 4, 6, 8, 12, 14, -1},
+ {10, 12, 14, -1, -1, -1, -1, -1},
+ { 0, 10, 12, 14, -1, -1, -1, -1},
+ { 2, 10, 12, 14, -1, -1, -1, -1},
+ { 0, 2, 10, 12, 14, -1, -1, -1},
+ { 4, 10, 12, 14, -1, -1, -1, -1},
+ { 0, 4, 10, 12, 14, -1, -1, -1},
+ { 2, 4, 10, 12, 14, -1, -1, -1},
+ { 0, 2, 4, 10, 12, 14, -1, -1},
+ { 6, 10, 12, 14, -1, -1, -1, -1},
+ { 0, 6, 10, 12, 14, -1, -1, -1},
+ { 2, 6, 10, 12, 14, -1, -1, -1},
+ { 0, 2, 6, 10, 12, 14, -1, -1},
+ { 4, 6, 10, 12, 14, -1, -1, -1},
+ { 0, 4, 6, 10, 12, 14, -1, -1},
+ { 2, 4, 6, 10, 12, 14, -1, -1},
+ { 0, 2, 4, 6, 10, 12, 14, -1},
+ { 8, 10, 12, 14, -1, -1, -1, -1},
+ { 0, 8, 10, 12, 14, -1, -1, -1},
+ { 2, 8, 10, 12, 14, -1, -1, -1},
+ { 0, 2, 8, 10, 12, 14, -1, -1},
+ { 4, 8, 10, 12, 14, -1, -1, -1},
+ { 0, 4, 8, 10, 12, 14, -1, -1},
+ { 2, 4, 8, 10, 12, 14, -1, -1},
+ { 0, 2, 4, 8, 10, 12, 14, -1},
+ { 6, 8, 10, 12, 14, -1, -1, -1},
+ { 0, 6, 8, 10, 12, 14, -1, -1},
+ { 2, 6, 8, 10, 12, 14, -1, -1},
+ { 0, 2, 6, 8, 10, 12, 14, -1},
+ { 4, 6, 8, 10, 12, 14, -1, -1},
+ { 0, 4, 6, 8, 10, 12, 14, -1},
+ { 2, 4, 6, 8, 10, 12, 14, -1},
+ { 0, 2, 4, 6, 8, 10, 12, 14}
+};
+#endif
+
+#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a)
+#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a)
+
+unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf)
+{
+ unsigned int ctr, pos;
+ uint16_t val0, val1;
+ uint32_t good;
+#ifdef BMI
+ uint64_t idx0, idx1, idx2, idx3;
+#endif
+ const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]);
+ const __m256i ones = _mm256_set1_epi8(1);
+ const __m256i mask = _mm256_set1_epi16(0xFFF);
+ const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10,
+ 9, 8, 8, 7, 6, 5, 5, 4,
+ 11,10,10, 9, 8, 7, 7, 6,
+ 5, 4, 4, 3, 2, 1, 1, 0);
+ __m256i f0, f1, g0, g1, g2, g3;
+ __m128i f, t, pilo, pihi;
+
+ ctr = pos = 0;
+ while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 56) {
+ f0 = _mm256_loadu_si256((__m256i *)&buf[pos]);
+ f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]);
+ f0 = _mm256_permute4x64_epi64(f0, 0x94);
+ f1 = _mm256_permute4x64_epi64(f1, 0x94);
+ f0 = _mm256_shuffle_epi8(f0, idx8);
+ f1 = _mm256_shuffle_epi8(f1, idx8);
+ g0 = _mm256_srli_epi16(f0, 4);
+ g1 = _mm256_srli_epi16(f1, 4);
+ f0 = _mm256_blend_epi16(f0, g0, 0xAA);
+ f1 = _mm256_blend_epi16(f1, g1, 0xAA);
+ f0 = _mm256_and_si256(f0, mask);
+ f1 = _mm256_and_si256(f1, mask);
+ pos += 48;
+
+ g0 = _mm256_cmpgt_epi16(bound, f0);
+ g1 = _mm256_cmpgt_epi16(bound, f1);
+
+ g0 = _mm256_packs_epi16(g0, g1);
+ good = _mm256_movemask_epi8(g0);
+
+#ifdef BMI
+ idx0 = _pdep_u64(good >> 0, 0x0101010101010101);
+ idx1 = _pdep_u64(good >> 8, 0x0101010101010101);
+ idx2 = _pdep_u64(good >> 16, 0x0101010101010101);
+ idx3 = _pdep_u64(good >> 24, 0x0101010101010101);
+ idx0 = (idx0 << 8) - idx0;
+ idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
+ idx1 = (idx1 << 8) - idx1;
+ idx1 = _pext_u64(0x0E0C0A0806040200, idx1);
+ idx2 = (idx2 << 8) - idx2;
+ idx2 = _pext_u64(0x0E0C0A0806040200, idx2);
+ idx3 = (idx3 << 8) - idx3;
+ idx3 = _pext_u64(0x0E0C0A0806040200, idx3);
+
+ g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0));
+ g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1));
+ g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1);
+ g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1);
+#else
+ g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF]));
+ g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF]));
+ g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1);
+ g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1);
+#endif
+
+ g2 = _mm256_add_epi8(g0, ones);
+ g3 = _mm256_add_epi8(g1, ones);
+ g0 = _mm256_unpacklo_epi8(g0, g2);
+ g1 = _mm256_unpacklo_epi8(g1, g3);
+
+ f0 = _mm256_shuffle_epi8(f0, g0);
+ f1 = _mm256_shuffle_epi8(f1, g1);
+
+ _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0));
+ ctr += _mm_popcnt_u32((good >> 0) & 0xFF);
+ _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1));
+ ctr += _mm_popcnt_u32((good >> 16) & 0xFF);
+ _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1));
+ ctr += _mm_popcnt_u32((good >> 8) & 0xFF);
+ _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1));
+ ctr += _mm_popcnt_u32((good >> 24) & 0xFF);
+ }
+
+ while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 16) {
+ f = _mm_loadu_si128((__m128i *)&buf[pos]);
+ f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8));
+ t = _mm_srli_epi16(f, 4);
+ f = _mm_blend_epi16(f, t, 0xAA);
+ f = _mm_and_si128(f, _mm256_castsi256_si128(mask));
+ pos += 12;
+
+ t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f);
+ good = _mm_movemask_epi8(t);
+
+#ifdef BMI
+ good &= 0x5555;
+ idx0 = _pdep_u64(good, 0x1111111111111111);
+ idx0 = (idx0 << 8) - idx0;
+ idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
+ pilo = _mm_cvtsi64_si128(idx0);
+#else
+ good = _pext_u32(good, 0x5555);
+ pilo = _mm_loadl_epi64((__m128i *)&idx[good]);
+#endif
+
+ pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones));
+ pilo = _mm_unpacklo_epi8(pilo, pihi);
+ f = _mm_shuffle_epi8(f, pilo);
+ _mm_storeu_si128((__m128i *)&r[ctr], f);
+ ctr += _mm_popcnt_u32(good);
+ }
+
+ while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) {
+ val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF;
+ val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4));
+ pos += 3;
+
+ if(val0 < KYBER_Q)
+ r[ctr++] = val0;
+ if(val1 < KYBER_Q && ctr < KYBER_N)
+ r[ctr++] = val1;
+ }
+
+ return ctr;
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.h
new file mode 100644
index 0000000000..3be5e2192e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/rejsample.h
@@ -0,0 +1,14 @@
+#ifndef REJSAMPLE_H
+#define REJSAMPLE_H
+
+#include
+#include "params.h"
+#include "symmetric.h"
+
+#define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES)
+#define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES)
+
+#define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx)
+unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.S
new file mode 100644
index 0000000000..18325ebec0
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.S
@@ -0,0 +1,255 @@
+#include "consts.h"
+.include "fq.inc"
+.include "shuffle.inc"
+
+/*
+nttpack_avx:
+#load
+vmovdqa (%rdi),%ymm4
+vmovdqa 32(%rdi),%ymm5
+vmovdqa 64(%rdi),%ymm6
+vmovdqa 96(%rdi),%ymm7
+vmovdqa 128(%rdi),%ymm8
+vmovdqa 160(%rdi),%ymm9
+vmovdqa 192(%rdi),%ymm10
+vmovdqa 224(%rdi),%ymm11
+
+shuffle1 4,5,3,5
+shuffle1 6,7,4,7
+shuffle1 8,9,6,9
+shuffle1 10,11,8,11
+
+shuffle2 3,4,10,4
+shuffle2 6,8,3,8
+shuffle2 5,7,6,7
+shuffle2 9,11,5,11
+
+shuffle4 10,3,9,3
+shuffle4 6,5,10,5
+shuffle4 4,8,6,8
+shuffle4 7,11,4,11
+
+shuffle8 9,10,7,10
+shuffle8 6,4,9,4
+shuffle8 3,5,6,5
+shuffle8 8,11,3,11
+
+#store
+vmovdqa %ymm7,(%rdi)
+vmovdqa %ymm9,32(%rdi)
+vmovdqa %ymm6,64(%rdi)
+vmovdqa %ymm3,96(%rdi)
+vmovdqa %ymm10,128(%rdi)
+vmovdqa %ymm4,160(%rdi)
+vmovdqa %ymm5,192(%rdi)
+vmovdqa %ymm11,224(%rdi)
+
+ret
+*/
+
+.text
+nttunpack128_avx:
+#load
+vmovdqa (%rdi),%ymm4
+vmovdqa 32(%rdi),%ymm5
+vmovdqa 64(%rdi),%ymm6
+vmovdqa 96(%rdi),%ymm7
+vmovdqa 128(%rdi),%ymm8
+vmovdqa 160(%rdi),%ymm9
+vmovdqa 192(%rdi),%ymm10
+vmovdqa 224(%rdi),%ymm11
+
+shuffle8 4,8,3,8
+shuffle8 5,9,4,9
+shuffle8 6,10,5,10
+shuffle8 7,11,6,11
+
+shuffle4 3,5,7,5
+shuffle4 8,10,3,10
+shuffle4 4,6,8,6
+shuffle4 9,11,4,11
+
+shuffle2 7,8,9,8
+shuffle2 5,6,7,6
+shuffle2 3,4,5,4
+shuffle2 10,11,3,11
+
+shuffle1 9,5,10,5
+shuffle1 8,4,9,4
+shuffle1 7,3,8,3
+shuffle1 6,11,7,11
+
+#store
+vmovdqa %ymm10,(%rdi)
+vmovdqa %ymm5,32(%rdi)
+vmovdqa %ymm9,64(%rdi)
+vmovdqa %ymm4,96(%rdi)
+vmovdqa %ymm8,128(%rdi)
+vmovdqa %ymm3,160(%rdi)
+vmovdqa %ymm7,192(%rdi)
+vmovdqa %ymm11,224(%rdi)
+
+ret
+
+.global cdecl(nttunpack_avx)
+cdecl(nttunpack_avx):
+call nttunpack128_avx
+add $256,%rdi
+call nttunpack128_avx
+ret
+
+ntttobytes128_avx:
+#load
+vmovdqa (%rsi),%ymm5
+vmovdqa 32(%rsi),%ymm6
+vmovdqa 64(%rsi),%ymm7
+vmovdqa 96(%rsi),%ymm8
+vmovdqa 128(%rsi),%ymm9
+vmovdqa 160(%rsi),%ymm10
+vmovdqa 192(%rsi),%ymm11
+vmovdqa 224(%rsi),%ymm12
+
+#csubq
+csubq 5,13
+csubq 6,13
+csubq 7,13
+csubq 8,13
+csubq 9,13
+csubq 10,13
+csubq 11,13
+csubq 12,13
+
+#bitpack
+vpsllw $12,%ymm6,%ymm4
+vpor %ymm4,%ymm5,%ymm4
+
+vpsrlw $4,%ymm6,%ymm5
+vpsllw $8,%ymm7,%ymm6
+vpor %ymm5,%ymm6,%ymm5
+
+vpsrlw $8,%ymm7,%ymm6
+vpsllw $4,%ymm8,%ymm7
+vpor %ymm6,%ymm7,%ymm6
+
+vpsllw $12,%ymm10,%ymm7
+vpor %ymm7,%ymm9,%ymm7
+
+vpsrlw $4,%ymm10,%ymm8
+vpsllw $8,%ymm11,%ymm9
+vpor %ymm8,%ymm9,%ymm8
+
+vpsrlw $8,%ymm11,%ymm9
+vpsllw $4,%ymm12,%ymm10
+vpor %ymm9,%ymm10,%ymm9
+
+shuffle1 4,5,3,5
+shuffle1 6,7,4,7
+shuffle1 8,9,6,9
+
+shuffle2 3,4,8,4
+shuffle2 6,5,3,5
+shuffle2 7,9,6,9
+
+shuffle4 8,3,7,3
+shuffle4 6,4,8,4
+shuffle4 5,9,6,9
+
+shuffle8 7,8,5,8
+shuffle8 6,3,7,3
+shuffle8 4,9,6,9
+
+#store
+vmovdqu %ymm5,(%rdi)
+vmovdqu %ymm7,32(%rdi)
+vmovdqu %ymm6,64(%rdi)
+vmovdqu %ymm8,96(%rdi)
+vmovdqu %ymm3,128(%rdi)
+vmovdqu %ymm9,160(%rdi)
+
+ret
+
+.global cdecl(ntttobytes_avx)
+cdecl(ntttobytes_avx):
+#consts
+vmovdqa _16XQ*2(%rdx),%ymm0
+call ntttobytes128_avx
+add $256,%rsi
+add $192,%rdi
+call ntttobytes128_avx
+ret
+
+nttfrombytes128_avx:
+#load
+vmovdqu (%rsi),%ymm4
+vmovdqu 32(%rsi),%ymm5
+vmovdqu 64(%rsi),%ymm6
+vmovdqu 96(%rsi),%ymm7
+vmovdqu 128(%rsi),%ymm8
+vmovdqu 160(%rsi),%ymm9
+
+shuffle8 4,7,3,7
+shuffle8 5,8,4,8
+shuffle8 6,9,5,9
+
+shuffle4 3,8,6,8
+shuffle4 7,5,3,5
+shuffle4 4,9,7,9
+
+shuffle2 6,5,4,5
+shuffle2 8,7,6,7
+shuffle2 3,9,8,9
+
+shuffle1 4,7,10,7
+shuffle1 5,8,4,8
+shuffle1 6,9,5,9
+
+#bitunpack
+vpsrlw $12,%ymm10,%ymm11
+vpsllw $4,%ymm7,%ymm12
+vpor %ymm11,%ymm12,%ymm11
+vpand %ymm0,%ymm10,%ymm10
+vpand %ymm0,%ymm11,%ymm11
+
+vpsrlw $8,%ymm7,%ymm12
+vpsllw $8,%ymm4,%ymm13
+vpor %ymm12,%ymm13,%ymm12
+vpand %ymm0,%ymm12,%ymm12
+
+vpsrlw $4,%ymm4,%ymm13
+vpand %ymm0,%ymm13,%ymm13
+
+vpsrlw $12,%ymm8,%ymm14
+vpsllw $4,%ymm5,%ymm15
+vpor %ymm14,%ymm15,%ymm14
+vpand %ymm0,%ymm8,%ymm8
+vpand %ymm0,%ymm14,%ymm14
+
+vpsrlw $8,%ymm5,%ymm15
+vpsllw $8,%ymm9,%ymm1
+vpor %ymm15,%ymm1,%ymm15
+vpand %ymm0,%ymm15,%ymm15
+
+vpsrlw $4,%ymm9,%ymm1
+vpand %ymm0,%ymm1,%ymm1
+
+#store
+vmovdqa %ymm10,(%rdi)
+vmovdqa %ymm11,32(%rdi)
+vmovdqa %ymm12,64(%rdi)
+vmovdqa %ymm13,96(%rdi)
+vmovdqa %ymm8,128(%rdi)
+vmovdqa %ymm14,160(%rdi)
+vmovdqa %ymm15,192(%rdi)
+vmovdqa %ymm1,224(%rdi)
+
+ret
+
+.global cdecl(nttfrombytes_avx)
+cdecl(nttfrombytes_avx):
+#consts
+vmovdqa _16XMASK*2(%rdx),%ymm0
+call nttfrombytes128_avx
+add $256,%rdi
+add $192,%rsi
+call nttfrombytes128_avx
+ret
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.inc
new file mode 100644
index 0000000000..73e9ffe03c
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/shuffle.inc
@@ -0,0 +1,25 @@
+.macro shuffle8 r0,r1,r2,r3
+vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2
+vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3
+.endm
+
+.macro shuffle4 r0,r1,r2,r3
+vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2
+vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3
+.endm
+
+.macro shuffle2 r0,r1,r2,r3
+#vpsllq $32,%ymm\r1,%ymm\r2
+vmovsldup %ymm\r1,%ymm\r2
+vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2
+vpsrlq $32,%ymm\r0,%ymm\r0
+#vmovshdup %ymm\r0,%ymm\r0
+vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
+.endm
+
+.macro shuffle1 r0,r1,r2,r3
+vpslld $16,%ymm\r1,%ymm\r2
+vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2
+vpsrld $16,%ymm\r0,%ymm\r0
+vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
+.endm
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric-shake.c
new file mode 100644
index 0000000000..20f451882e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric-shake.c
@@ -0,0 +1,74 @@
+#include
+#include
+#include
+#include "params.h"
+#include "symmetric.h"
+#include "fips202.h"
+
+/*************************************************
+* Name: kyber_shake128_absorb
+*
+* Description: Absorb step of the SHAKE128 specialized for the Kyber context.
+*
+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
+* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state
+* - uint8_t i: additional byte of input
+* - uint8_t j: additional byte of input
+**************************************************/
+void kyber_shake128_absorb(shake128incctx *state,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y)
+{
+ uint8_t extseed[KYBER_SYMBYTES+2];
+
+ memcpy(extseed, seed, KYBER_SYMBYTES);
+ extseed[KYBER_SYMBYTES+0] = x;
+ extseed[KYBER_SYMBYTES+1] = y;
+
+ shake128_absorb_once(state, extseed, sizeof(extseed));
+}
+
+/*************************************************
+* Name: kyber_shake256_prf
+*
+* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
+* and then generates outlen bytes of SHAKE256 output
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: number of requested output bytes
+* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
+* - uint8_t nonce: single-byte nonce (public PRF input)
+**************************************************/
+void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t extkey[KYBER_SYMBYTES+1];
+
+ memcpy(extkey, key, KYBER_SYMBYTES);
+ extkey[KYBER_SYMBYTES] = nonce;
+
+ shake256(out, outlen, extkey, sizeof(extkey));
+}
+
+/*************************************************
+* Name: kyber_shake256_prf
+*
+* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
+* and then generates outlen bytes of SHAKE256 output
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: number of requested output bytes
+* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
+* - uint8_t nonce: single-byte nonce (public PRF input)
+**************************************************/
+void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES])
+{
+ shake256incctx s;
+
+ shake256_inc_init(&s);
+ shake256_inc_absorb(&s, key, KYBER_SYMBYTES);
+ shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES);
+ shake256_inc_finalize(&s);
+ shake256_inc_squeeze(out, KYBER_SSBYTES, &s);
+ shake256_inc_ctx_release(&s);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric.h
new file mode 100644
index 0000000000..e4941f7a86
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/symmetric.h
@@ -0,0 +1,34 @@
+#ifndef SYMMETRIC_H
+#define SYMMETRIC_H
+
+#include
+#include
+#include "params.h"
+
+#include "fips202.h"
+#include "fips202x4.h"
+
+typedef shake128incctx xof_state;
+
+#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb)
+void kyber_shake128_absorb(shake128incctx *s,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y);
+
+#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf)
+void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce);
+
+#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf)
+void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]);
+
+#define XOF_BLOCKBYTES SHAKE128_RATE
+
+#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES)
+#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES)
+#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y)
+#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE)
+#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE)
+#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT)
+
+#endif /* SYMMETRIC_H */
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/verify.c
new file mode 100644
index 0000000000..aa8e2850b1
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_avx2/verify.c
@@ -0,0 +1,73 @@
+#include
+#include
+#include
+#include "verify.h"
+
+/*************************************************
+* Name: verify
+*
+* Description: Compare two arrays for equality in constant time.
+*
+* Arguments: const uint8_t *a: pointer to first byte array
+* const uint8_t *b: pointer to second byte array
+* size_t len: length of the byte arrays
+*
+* Returns 0 if the byte arrays are equal, 1 otherwise
+**************************************************/
+int verify(const uint8_t *a, const uint8_t *b, size_t len)
+{
+ size_t i;
+ uint64_t r;
+ __m256i f, g, h;
+
+ h = _mm256_setzero_si256();
+ for(i=0;i> 63;
+ return r;
+}
+
+/*************************************************
+* Name: cmov
+*
+* Description: Copy len bytes from x to r if b is 1;
+* don't modify x if b is 0. Requires b to be in {0,1};
+* assumes two's complement representation of negative integers.
+* Runs in constant time.
+*
+* Arguments: uint8_t *r: pointer to output byte array
+* const uint8_t *x: pointer to input byte array
+* size_t len: Amount of bytes to be copied
+* uint8_t b: Condition bit; has to be in {0,1}
+**************************************************/
+void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b)
+{
+ size_t i;
+ __m256i xvec, rvec, bvec;
+
+ bvec = _mm256_set1_epi64x(-(uint64_t)b);
+ for(i=0;i
+#include
+#include "params.h"
+
+#define verify KYBER_NAMESPACE(verify)
+int verify(const uint8_t *a, const uint8_t *b, size_t len);
+
+#define cmov KYBER_NAMESPACE(cmov)
+void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/LICENSE
new file mode 100644
index 0000000000..7922ab8007
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/LICENSE
@@ -0,0 +1,6 @@
+Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/);
+or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html).
+
+For Keccak and AES we are using public-domain
+code from sources and by authors listed in
+comments on top of the respective files.
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/api.h
new file mode 100644
index 0000000000..70d40f3f3e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/api.h
@@ -0,0 +1,66 @@
+#ifndef API_H
+#define API_H
+
+#include
+
+#define pqcrystals_kyber512_SECRETKEYBYTES 1632
+#define pqcrystals_kyber512_PUBLICKEYBYTES 800
+#define pqcrystals_kyber512_CIPHERTEXTBYTES 768
+#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber512_ENCCOINBYTES 32
+#define pqcrystals_kyber512_BYTES 32
+
+#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES
+#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES
+#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES
+#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES
+#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES
+#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES
+
+int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber768_SECRETKEYBYTES 2400
+#define pqcrystals_kyber768_PUBLICKEYBYTES 1184
+#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088
+#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber768_ENCCOINBYTES 32
+#define pqcrystals_kyber768_BYTES 32
+
+#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES
+#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES
+#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES
+#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES
+#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES
+#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES
+
+int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber1024_SECRETKEYBYTES 3168
+#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568
+#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568
+#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber1024_ENCCOINBYTES 32
+#define pqcrystals_kyber1024_BYTES 32
+
+#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES
+#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES
+#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES
+#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES
+#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES
+#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES
+
+int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.c
new file mode 100644
index 0000000000..1500ffea56
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.c
@@ -0,0 +1,128 @@
+#include
+#include "params.h"
+#include "cbd.h"
+
+/*************************************************
+* Name: load32_littleendian
+*
+* Description: load 4 bytes into a 32-bit integer
+* in little-endian order
+*
+* Arguments: - const uint8_t *x: pointer to input byte array
+*
+* Returns 32-bit unsigned integer loaded from x
+**************************************************/
+static uint32_t load32_littleendian(const uint8_t x[4])
+{
+ uint32_t r;
+ r = (uint32_t)x[0];
+ r |= (uint32_t)x[1] << 8;
+ r |= (uint32_t)x[2] << 16;
+ r |= (uint32_t)x[3] << 24;
+ return r;
+}
+
+/*************************************************
+* Name: load24_littleendian
+*
+* Description: load 3 bytes into a 32-bit integer
+* in little-endian order.
+* This function is only needed for Kyber-512
+*
+* Arguments: - const uint8_t *x: pointer to input byte array
+*
+* Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
+**************************************************/
+#if KYBER_ETA1 == 3
+static uint32_t load24_littleendian(const uint8_t x[3])
+{
+ uint32_t r;
+ r = (uint32_t)x[0];
+ r |= (uint32_t)x[1] << 8;
+ r |= (uint32_t)x[2] << 16;
+ return r;
+}
+#endif
+
+
+/*************************************************
+* Name: cbd2
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *buf: pointer to input byte array
+**************************************************/
+static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4])
+{
+ unsigned int i,j;
+ uint32_t t,d;
+ int16_t a,b;
+
+ for(i=0;i>1) & 0x55555555;
+
+ for(j=0;j<8;j++) {
+ a = (d >> (4*j+0)) & 0x3;
+ b = (d >> (4*j+2)) & 0x3;
+ r->coeffs[8*i+j] = a - b;
+ }
+ }
+}
+
+/*************************************************
+* Name: cbd3
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=3.
+* This function is only needed for Kyber-512
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *buf: pointer to input byte array
+**************************************************/
+#if KYBER_ETA1 == 3
+static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4])
+{
+ unsigned int i,j;
+ uint32_t t,d;
+ int16_t a,b;
+
+ for(i=0;i>1) & 0x00249249;
+ d += (t>>2) & 0x00249249;
+
+ for(j=0;j<4;j++) {
+ a = (d >> (6*j+0)) & 0x7;
+ b = (d >> (6*j+3)) & 0x7;
+ r->coeffs[4*i+j] = a - b;
+ }
+ }
+}
+#endif
+
+void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4])
+{
+#if KYBER_ETA1 == 2
+ cbd2(r, buf);
+#elif KYBER_ETA1 == 3
+ cbd3(r, buf);
+#else
+#error "This implementation requires eta1 in {2,3}"
+#endif
+}
+
+void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4])
+{
+#if KYBER_ETA2 == 2
+ cbd2(r, buf);
+#else
+#error "This implementation requires eta2 = 2"
+#endif
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.h
new file mode 100644
index 0000000000..7b677d745d
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/cbd.h
@@ -0,0 +1,14 @@
+#ifndef CBD_H
+#define CBD_H
+
+#include
+#include "params.h"
+#include "poly.h"
+
+#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1)
+void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]);
+
+#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2)
+void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/indcpa.c
new file mode 100644
index 0000000000..4a8b4c894f
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/indcpa.c
@@ -0,0 +1,331 @@
+#include
+#include
+#include
+#include "params.h"
+#include "indcpa.h"
+#include "polyvec.h"
+#include "poly.h"
+#include "ntt.h"
+#include "symmetric.h"
+#include "randombytes.h"
+
+/*************************************************
+* Name: pack_pk
+*
+* Description: Serialize the public key as concatenation of the
+* serialized vector of polynomials pk
+* and the public seed used to generate the matrix A.
+*
+* Arguments: uint8_t *r: pointer to the output serialized public key
+* polyvec *pk: pointer to the input public-key polyvec
+* const uint8_t *seed: pointer to the input public seed
+**************************************************/
+static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES],
+ polyvec *pk,
+ const uint8_t seed[KYBER_SYMBYTES])
+{
+ polyvec_tobytes(r, pk);
+ memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES);
+}
+
+/*************************************************
+* Name: unpack_pk
+*
+* Description: De-serialize public key from a byte array;
+* approximate inverse of pack_pk
+*
+* Arguments: - polyvec *pk: pointer to output public-key polynomial vector
+* - uint8_t *seed: pointer to output seed to generate matrix A
+* - const uint8_t *packedpk: pointer to input serialized public key
+**************************************************/
+static void unpack_pk(polyvec *pk,
+ uint8_t seed[KYBER_SYMBYTES],
+ const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES])
+{
+ polyvec_frombytes(pk, packedpk);
+ memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES);
+}
+
+/*************************************************
+* Name: pack_sk
+*
+* Description: Serialize the secret key
+*
+* Arguments: - uint8_t *r: pointer to output serialized secret key
+* - polyvec *sk: pointer to input vector of polynomials (secret key)
+**************************************************/
+static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk)
+{
+ polyvec_tobytes(r, sk);
+}
+
+/*************************************************
+* Name: unpack_sk
+*
+* Description: De-serialize the secret key; inverse of pack_sk
+*
+* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key)
+* - const uint8_t *packedsk: pointer to input serialized secret key
+**************************************************/
+static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES])
+{
+ polyvec_frombytes(sk, packedsk);
+}
+
+/*************************************************
+* Name: pack_ciphertext
+*
+* Description: Serialize the ciphertext as concatenation of the
+* compressed and serialized vector of polynomials b
+* and the compressed and serialized polynomial v
+*
+* Arguments: uint8_t *r: pointer to the output serialized ciphertext
+* poly *pk: pointer to the input vector of polynomials b
+* poly *v: pointer to the input polynomial v
+**************************************************/
+static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v)
+{
+ polyvec_compress(r, b);
+ poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v);
+}
+
+/*************************************************
+* Name: unpack_ciphertext
+*
+* Description: De-serialize and decompress ciphertext from a byte array;
+* approximate inverse of pack_ciphertext
+*
+* Arguments: - polyvec *b: pointer to the output vector of polynomials b
+* - poly *v: pointer to the output polynomial v
+* - const uint8_t *c: pointer to the input serialized ciphertext
+**************************************************/
+static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES])
+{
+ polyvec_decompress(b, c);
+ poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES);
+}
+
+/*************************************************
+* Name: rej_uniform
+*
+* Description: Run rejection sampling on uniform random bytes to generate
+* uniform random integers mod q
+*
+* Arguments: - int16_t *r: pointer to output buffer
+* - unsigned int len: requested number of 16-bit integers (uniform mod q)
+* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes)
+* - unsigned int buflen: length of input buffer in bytes
+*
+* Returns number of sampled 16-bit integers (at most len)
+**************************************************/
+static unsigned int rej_uniform(int16_t *r,
+ unsigned int len,
+ const uint8_t *buf,
+ unsigned int buflen)
+{
+ unsigned int ctr, pos;
+ uint16_t val0, val1;
+
+ ctr = pos = 0;
+ while(ctr < len && pos + 3 <= buflen) {
+ val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF;
+ val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF;
+ pos += 3;
+
+ if(val0 < KYBER_Q)
+ r[ctr++] = val0;
+ if(ctr < len && val1 < KYBER_Q)
+ r[ctr++] = val1;
+ }
+
+ return ctr;
+}
+
+#define gen_a(A,B) gen_matrix(A,B,0)
+#define gen_at(A,B) gen_matrix(A,B,1)
+
+/*************************************************
+* Name: gen_matrix
+*
+* Description: Deterministically generate matrix A (or the transpose of A)
+* from a seed. Entries of the matrix are polynomials that look
+* uniformly random. Performs rejection sampling on output of
+* a XOF
+*
+* Arguments: - polyvec *a: pointer to ouptput matrix A
+* - const uint8_t *seed: pointer to input seed
+* - int transposed: boolean deciding whether A or A^T is generated
+**************************************************/
+#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES)
+// Not static for benchmarking
+void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed)
+{
+ unsigned int ctr, i, j, k;
+ unsigned int buflen, off;
+ uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2];
+ xof_state state;
+ xof_init(&state, seed);
+
+ for(i=0;i
+#include "params.h"
+#include "polyvec.h"
+
+#define gen_matrix KYBER_NAMESPACE(gen_matrix)
+void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed);
+
+#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand)
+void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_enc KYBER_NAMESPACE(indcpa_enc)
+void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_dec KYBER_NAMESPACE(indcpa_dec)
+void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.c
new file mode 100644
index 0000000000..63abc1029c
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.c
@@ -0,0 +1,169 @@
+#include
+#include
+#include
+#include "params.h"
+#include "kem.h"
+#include "indcpa.h"
+#include "verify.h"
+#include "symmetric.h"
+#include "randombytes.h"
+/*************************************************
+* Name: crypto_kem_keypair_derand
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+* - uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_keypair_derand(uint8_t *pk,
+ uint8_t *sk,
+ const uint8_t *coins)
+{
+ indcpa_keypair_derand(pk, sk, coins);
+ memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ /* Value z for pseudo-random output on reject */
+ memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_keypair
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_keypair(uint8_t *pk,
+ uint8_t *sk)
+{
+ uint8_t coins[2*KYBER_SYMBYTES];
+ randombytes(coins, 2*KYBER_SYMBYTES);
+ crypto_kem_keypair_derand(pk, sk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc_derand
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - const uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_enc_derand(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk,
+ const uint8_t *coins)
+{
+ uint8_t buf[2*KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2*KYBER_SYMBYTES];
+
+ memcpy(buf, coins, KYBER_SYMBYTES);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_g(kr, buf, 2*KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES);
+
+ memcpy(ss,kr,KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int crypto_kem_enc(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk)
+{
+ uint8_t coins[KYBER_SYMBYTES];
+ randombytes(coins, KYBER_SYMBYTES);
+ crypto_kem_enc_derand(ct, ss, pk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_dec
+*
+* Description: Generates shared secret for given
+* cipher text and private key
+*
+* Arguments: - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *ct: pointer to input cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - const uint8_t *sk: pointer to input private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0.
+*
+* On failure, ss will contain a pseudo-random value.
+**************************************************/
+int crypto_kem_dec(uint8_t *ss,
+ const uint8_t *ct,
+ const uint8_t *sk)
+{
+ int fail;
+ uint8_t buf[2*KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2*KYBER_SYMBYTES];
+ uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES];
+ const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES;
+
+ indcpa_dec(buf, ct, sk);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES);
+ hash_g(kr, buf, 2*KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES);
+
+ fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES);
+
+ /* Compute rejection key */
+ rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct);
+
+ /* Copy true key to return buffer if fail is false */
+ cmov(ss,kr,KYBER_SYMBYTES,!fail);
+
+ return 0;
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.h
new file mode 100644
index 0000000000..234f11966b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/kem.h
@@ -0,0 +1,35 @@
+#ifndef KEM_H
+#define KEM_H
+
+#include
+#include "params.h"
+
+#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES
+#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES
+#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES
+#define CRYPTO_BYTES KYBER_SSBYTES
+
+#if (KYBER_K == 2)
+#define CRYPTO_ALGNAME "Kyber512"
+#elif (KYBER_K == 3)
+#define CRYPTO_ALGNAME "Kyber768"
+#elif (KYBER_K == 4)
+#define CRYPTO_ALGNAME "Kyber1024"
+#endif
+
+#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand)
+int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+
+#define crypto_kem_keypair KYBER_NAMESPACE(keypair)
+int crypto_kem_keypair(uint8_t *pk, uint8_t *sk);
+
+#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand)
+int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+
+#define crypto_kem_enc KYBER_NAMESPACE(enc)
+int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+
+#define crypto_kem_dec KYBER_NAMESPACE(dec)
+int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.c
new file mode 100644
index 0000000000..2f2eb10b2f
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.c
@@ -0,0 +1,146 @@
+#include
+#include "params.h"
+#include "ntt.h"
+#include "reduce.h"
+
+/* Code to generate zetas and zetas_inv used in the number-theoretic transform:
+
+#define KYBER_ROOT_OF_UNITY 17
+
+static const uint8_t tree[128] = {
+ 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120,
+ 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124,
+ 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122,
+ 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126,
+ 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121,
+ 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125,
+ 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123,
+ 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127
+};
+
+void init_ntt() {
+ unsigned int i;
+ int16_t tmp[128];
+
+ tmp[0] = MONT;
+ for(i=1;i<128;i++)
+ tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q);
+
+ for(i=0;i<128;i++) {
+ zetas[i] = tmp[tree[i]];
+ if(zetas[i] > KYBER_Q/2)
+ zetas[i] -= KYBER_Q;
+ if(zetas[i] < -KYBER_Q/2)
+ zetas[i] += KYBER_Q;
+ }
+}
+*/
+
+const int16_t zetas[128] = {
+ -1044, -758, -359, -1517, 1493, 1422, 287, 202,
+ -171, 622, 1577, 182, 962, -1202, -1474, 1468,
+ 573, -1325, 264, 383, -829, 1458, -1602, -130,
+ -681, 1017, 732, 608, -1542, 411, -205, -1571,
+ 1223, 652, -552, 1015, -1293, 1491, -282, -1544,
+ 516, -8, -320, -666, -1618, -1162, 126, 1469,
+ -853, -90, -271, 830, 107, -1421, -247, -951,
+ -398, 961, -1508, -725, 448, -1065, 677, -1275,
+ -1103, 430, 555, 843, -1251, 871, 1550, 105,
+ 422, 587, 177, -235, -291, -460, 1574, 1653,
+ -246, 778, 1159, -147, -777, 1483, -602, 1119,
+ -1590, 644, -872, 349, 418, 329, -156, -75,
+ 817, 1097, 603, 610, 1322, -1285, -1465, 384,
+ -1215, -136, 1218, -1335, -874, 220, -1187, -1659,
+ -1185, -1530, -1278, 794, -1510, -854, -870, 478,
+ -108, -308, 996, 991, 958, -1460, 1522, 1628
+};
+
+/*************************************************
+* Name: fqmul
+*
+* Description: Multiplication followed by Montgomery reduction
+*
+* Arguments: - int16_t a: first factor
+* - int16_t b: second factor
+*
+* Returns 16-bit integer congruent to a*b*R^{-1} mod q
+**************************************************/
+static int16_t fqmul(int16_t a, int16_t b) {
+ return montgomery_reduce((int32_t)a*b);
+}
+
+/*************************************************
+* Name: ntt
+*
+* Description: Inplace number-theoretic transform (NTT) in Rq.
+* input is in standard order, output is in bitreversed order
+*
+* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq
+**************************************************/
+void ntt(int16_t r[256]) {
+ unsigned int len, start, j, k;
+ int16_t t, zeta;
+
+ k = 1;
+ for(len = 128; len >= 2; len >>= 1) {
+ for(start = 0; start < 256; start = j + len) {
+ zeta = zetas[k++];
+ for(j = start; j < start + len; j++) {
+ t = fqmul(zeta, r[j + len]);
+ r[j + len] = r[j] - t;
+ r[j] = r[j] + t;
+ }
+ }
+ }
+}
+
+/*************************************************
+* Name: invntt_tomont
+*
+* Description: Inplace inverse number-theoretic transform in Rq and
+* multiplication by Montgomery factor 2^16.
+* Input is in bitreversed order, output is in standard order
+*
+* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq
+**************************************************/
+void invntt(int16_t r[256]) {
+ unsigned int start, len, j, k;
+ int16_t t, zeta;
+ const int16_t f = 1441; // mont^2/128
+
+ k = 127;
+ for(len = 2; len <= 128; len <<= 1) {
+ for(start = 0; start < 256; start = j + len) {
+ zeta = zetas[k--];
+ for(j = start; j < start + len; j++) {
+ t = r[j];
+ r[j] = barrett_reduce(t + r[j + len]);
+ r[j + len] = r[j + len] - t;
+ r[j + len] = fqmul(zeta, r[j + len]);
+ }
+ }
+ }
+
+ for(j = 0; j < 256; j++)
+ r[j] = fqmul(r[j], f);
+}
+
+/*************************************************
+* Name: basemul
+*
+* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta)
+* used for multiplication of elements in Rq in NTT domain
+*
+* Arguments: - int16_t r[2]: pointer to the output polynomial
+* - const int16_t a[2]: pointer to the first factor
+* - const int16_t b[2]: pointer to the second factor
+* - int16_t zeta: integer defining the reduction polynomial
+**************************************************/
+void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta)
+{
+ r[0] = fqmul(a[1], b[1]);
+ r[0] = fqmul(r[0], zeta);
+ r[0] += fqmul(a[0], b[0]);
+ r[1] = fqmul(a[0], b[1]);
+ r[1] += fqmul(a[1], b[0]);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.h
new file mode 100644
index 0000000000..227ea74f08
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/ntt.h
@@ -0,0 +1,19 @@
+#ifndef NTT_H
+#define NTT_H
+
+#include
+#include "params.h"
+
+#define zetas KYBER_NAMESPACE(zetas)
+extern const int16_t zetas[128];
+
+#define ntt KYBER_NAMESPACE(ntt)
+void ntt(int16_t poly[256]);
+
+#define invntt KYBER_NAMESPACE(invntt)
+void invntt(int16_t poly[256]);
+
+#define basemul KYBER_NAMESPACE(basemul)
+void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/params.h
new file mode 100644
index 0000000000..36b2b987f3
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/params.h
@@ -0,0 +1,55 @@
+#ifndef PARAMS_H
+#define PARAMS_H
+
+#ifndef KYBER_K
+#define KYBER_K 3 /* Change this for different security strengths */
+#endif
+
+
+/* Don't change parameters below this line */
+#if (KYBER_K == 2)
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ipd_ref_##s
+#elif (KYBER_K == 3)
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ipd_ref_##s
+#elif (KYBER_K == 4)
+#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ipd_ref_##s
+#else
+#error "KYBER_K must be in {2,3,4}"
+#endif
+
+#define KYBER_N 256
+#define KYBER_Q 3329
+
+#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */
+#define KYBER_SSBYTES 32 /* size in bytes of shared key */
+
+#define KYBER_POLYBYTES 384
+#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES)
+
+#if KYBER_K == 2
+#define KYBER_ETA1 3
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 3
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 4
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 160
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352)
+#endif
+
+#define KYBER_ETA2 2
+
+#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES)
+#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES)
+#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES)
+#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES)
+
+#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES)
+/* 32 bytes of additional space to save H(pk) */
+#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES)
+#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES)
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.c
new file mode 100644
index 0000000000..0fe5a20f63
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.c
@@ -0,0 +1,360 @@
+#include
+#include "params.h"
+#include "poly.h"
+#include "ntt.h"
+#include "reduce.h"
+#include "cbd.h"
+#include "symmetric.h"
+
+/*************************************************
+* Name: poly_compress
+*
+* Description: Compression and subsequent serialization of a polynomial
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (of length KYBER_POLYCOMPRESSEDBYTES)
+* - const poly *a: pointer to input polynomial
+**************************************************/
+void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
+{
+ unsigned int i,j;
+ int32_t u;
+ uint32_t d0;
+ uint8_t t[8];
+
+#if (KYBER_POLYCOMPRESSEDBYTES == 128)
+
+ for(i=0;icoeffs[8*i+j];
+ u += (u >> 15) & KYBER_Q;
+/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
+ d0 = u << 4;
+ d0 += 1665;
+ d0 *= 80635;
+ d0 >>= 28;
+ t[j] = d0 & 0xf;
+ }
+
+ r[0] = t[0] | (t[1] << 4);
+ r[1] = t[2] | (t[3] << 4);
+ r[2] = t[4] | (t[5] << 4);
+ r[3] = t[6] | (t[7] << 4);
+ r += 4;
+ }
+#elif (KYBER_POLYCOMPRESSEDBYTES == 160)
+ for(i=0;icoeffs[8*i+j];
+ u += (u >> 15) & KYBER_Q;
+/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
+ d0 = u << 5;
+ d0 += 1664;
+ d0 *= 40318;
+ d0 >>= 27;
+ t[j] = d0 & 0x1f;
+ }
+
+ r[0] = (t[0] >> 0) | (t[1] << 5);
+ r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7);
+ r[2] = (t[3] >> 1) | (t[4] << 4);
+ r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6);
+ r[4] = (t[6] >> 2) | (t[7] << 3);
+ r += 5;
+ }
+#else
+#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}"
+#endif
+}
+
+/*************************************************
+* Name: poly_decompress
+*
+* Description: De-serialization and subsequent decompression of a polynomial;
+* approximate inverse of poly_compress
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *a: pointer to input byte array
+* (of length KYBER_POLYCOMPRESSEDBYTES bytes)
+**************************************************/
+void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES])
+{
+ unsigned int i;
+
+#if (KYBER_POLYCOMPRESSEDBYTES == 128)
+ for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4;
+ r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4;
+ a += 1;
+ }
+#elif (KYBER_POLYCOMPRESSEDBYTES == 160)
+ unsigned int j;
+ uint8_t t[8];
+ for(i=0;i> 0);
+ t[1] = (a[0] >> 5) | (a[1] << 3);
+ t[2] = (a[1] >> 2);
+ t[3] = (a[1] >> 7) | (a[2] << 1);
+ t[4] = (a[2] >> 4) | (a[3] << 4);
+ t[5] = (a[3] >> 1);
+ t[6] = (a[3] >> 6) | (a[4] << 2);
+ t[7] = (a[4] >> 3);
+ a += 5;
+
+ for(j=0;j<8;j++)
+ r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5;
+ }
+#else
+#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}"
+#endif
+}
+
+/*************************************************
+* Name: poly_tobytes
+*
+* Description: Serialization of a polynomial
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYBYTES bytes)
+* - const poly *a: pointer to input polynomial
+**************************************************/
+void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a)
+{
+ unsigned int i;
+ uint16_t t0, t1;
+
+ for(i=0;icoeffs[2*i];
+ t0 += ((int16_t)t0 >> 15) & KYBER_Q;
+ t1 = a->coeffs[2*i+1];
+ t1 += ((int16_t)t1 >> 15) & KYBER_Q;
+ r[3*i+0] = (t0 >> 0);
+ r[3*i+1] = (t0 >> 8) | (t1 << 4);
+ r[3*i+2] = (t1 >> 4);
+ }
+}
+
+/*************************************************
+* Name: poly_frombytes
+*
+* Description: De-serialization of a polynomial;
+* inverse of poly_tobytes
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *a: pointer to input byte array
+* (of KYBER_POLYBYTES bytes)
+**************************************************/
+void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES])
+{
+ unsigned int i;
+ for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF;
+ r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF;
+ }
+}
+
+/*************************************************
+* Name: poly_frommsg
+*
+* Description: Convert 32-byte message to polynomial
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *msg: pointer to input message
+**************************************************/
+void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES])
+{
+ unsigned int i,j;
+ int16_t mask;
+
+#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8)
+#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!"
+#endif
+
+ for(i=0;i> j)&1);
+ r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2);
+ }
+ }
+}
+
+/*************************************************
+* Name: poly_tomsg
+*
+* Description: Convert polynomial to 32-byte message
+*
+* Arguments: - uint8_t *msg: pointer to output message
+* - const poly *a: pointer to input polynomial
+**************************************************/
+void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a)
+{
+ unsigned int i,j;
+ uint32_t t;
+
+ for(i=0;icoeffs[8*i+j];
+ // t += ((int16_t)t >> 15) & KYBER_Q;
+ // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1;
+ t <<= 1;
+ t += 1665;
+ t *= 80635;
+ t >>= 28;
+ t &= 1;
+ msg[i] |= t << j;
+ }
+ }
+}
+
+/*************************************************
+* Name: poly_getnoise_eta1
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA1
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t buf[KYBER_ETA1*KYBER_N/4];
+ prf(buf, sizeof(buf), seed, nonce);
+ poly_cbd_eta1(r, buf);
+}
+
+/*************************************************
+* Name: poly_getnoise_eta2
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t buf[KYBER_ETA2*KYBER_N/4];
+ prf(buf, sizeof(buf), seed, nonce);
+ poly_cbd_eta2(r, buf);
+}
+
+
+/*************************************************
+* Name: poly_ntt
+*
+* Description: Computes negacyclic number-theoretic transform (NTT) of
+* a polynomial in place;
+* inputs assumed to be in normal order, output in bitreversed order
+*
+* Arguments: - uint16_t *r: pointer to in/output polynomial
+**************************************************/
+void poly_ntt(poly *r)
+{
+ ntt(r->coeffs);
+ poly_reduce(r);
+}
+
+/*************************************************
+* Name: poly_invntt_tomont
+*
+* Description: Computes inverse of negacyclic number-theoretic transform (NTT)
+* of a polynomial in place;
+* inputs assumed to be in bitreversed order, output in normal order
+*
+* Arguments: - uint16_t *a: pointer to in/output polynomial
+**************************************************/
+void poly_invntt_tomont(poly *r)
+{
+ invntt(r->coeffs);
+}
+
+/*************************************************
+* Name: poly_basemul_montgomery
+*
+* Description: Multiplication of two polynomials in NTT domain
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_basemul_montgomery(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]);
+ basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]);
+ }
+}
+
+/*************************************************
+* Name: poly_tomont
+*
+* Description: Inplace conversion of all coefficients of a polynomial
+* from normal domain to Montgomery domain
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+void poly_tomont(poly *r)
+{
+ unsigned int i;
+ const int16_t f = (1ULL << 32) % KYBER_Q;
+ for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f);
+}
+
+/*************************************************
+* Name: poly_reduce
+*
+* Description: Applies Barrett reduction to all coefficients of a polynomial
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+void poly_reduce(poly *r)
+{
+ unsigned int i;
+ for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]);
+}
+
+/*************************************************
+* Name: poly_add
+*
+* Description: Add two polynomials; no modular reduction is performed
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_add(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i];
+}
+
+/*************************************************
+* Name: poly_sub
+*
+* Description: Subtract two polynomials; no modular reduction is performed
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+void poly_sub(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i];
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.h
new file mode 100644
index 0000000000..9a99c7cdad
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/poly.h
@@ -0,0 +1,53 @@
+#ifndef POLY_H
+#define POLY_H
+
+#include
+#include "params.h"
+
+/*
+ * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
+ * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
+ */
+typedef struct{
+ int16_t coeffs[KYBER_N];
+} poly;
+
+#define poly_compress KYBER_NAMESPACE(poly_compress)
+void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a);
+#define poly_decompress KYBER_NAMESPACE(poly_decompress)
+void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]);
+
+#define poly_tobytes KYBER_NAMESPACE(poly_tobytes)
+void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a);
+#define poly_frombytes KYBER_NAMESPACE(poly_frombytes)
+void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]);
+
+#define poly_frommsg KYBER_NAMESPACE(poly_frommsg)
+void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]);
+#define poly_tomsg KYBER_NAMESPACE(poly_tomsg)
+void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r);
+
+#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1)
+void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2)
+void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#define poly_ntt KYBER_NAMESPACE(poly_ntt)
+void poly_ntt(poly *r);
+#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont)
+void poly_invntt_tomont(poly *r);
+#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery)
+void poly_basemul_montgomery(poly *r, const poly *a, const poly *b);
+#define poly_tomont KYBER_NAMESPACE(poly_tomont)
+void poly_tomont(poly *r);
+
+#define poly_reduce KYBER_NAMESPACE(poly_reduce)
+void poly_reduce(poly *r);
+
+#define poly_add KYBER_NAMESPACE(poly_add)
+void poly_add(poly *r, const poly *a, const poly *b);
+#define poly_sub KYBER_NAMESPACE(poly_sub)
+void poly_sub(poly *r, const poly *a, const poly *b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.c
new file mode 100644
index 0000000000..661c71ec32
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.c
@@ -0,0 +1,247 @@
+#include
+#include "params.h"
+#include "poly.h"
+#include "polyvec.h"
+
+/*************************************************
+* Name: polyvec_compress
+*
+* Description: Compress and serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECCOMPRESSEDBYTES)
+* - const polyvec *a: pointer to input vector of polynomials
+**************************************************/
+void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
+{
+ unsigned int i,j,k;
+ uint64_t d0;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ uint16_t t[8];
+ for(i=0;ivec[i].coeffs[8*j+k];
+ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
+/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
+
+ }
+
+ r[ 0] = (t[0] >> 0);
+ r[ 1] = (t[0] >> 8) | (t[1] << 3);
+ r[ 2] = (t[1] >> 5) | (t[2] << 6);
+ r[ 3] = (t[2] >> 2);
+ r[ 4] = (t[2] >> 10) | (t[3] << 1);
+ r[ 5] = (t[3] >> 7) | (t[4] << 4);
+ r[ 6] = (t[4] >> 4) | (t[5] << 7);
+ r[ 7] = (t[5] >> 1);
+ r[ 8] = (t[5] >> 9) | (t[6] << 2);
+ r[ 9] = (t[6] >> 6) | (t[7] << 5);
+ r[10] = (t[7] >> 3);
+ r += 11;
+ }
+ }
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ uint16_t t[4];
+ for(i=0;ivec[i].coeffs[4*j+k];
+ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
+/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
+ }
+
+ r[0] = (t[0] >> 0);
+ r[1] = (t[0] >> 8) | (t[1] << 2);
+ r[2] = (t[1] >> 6) | (t[2] << 4);
+ r[3] = (t[2] >> 4) | (t[3] << 6);
+ r[4] = (t[3] >> 2);
+ r += 5;
+ }
+ }
+#else
+#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}"
+#endif
+}
+
+/*************************************************
+* Name: polyvec_decompress
+*
+* Description: De-serialize and decompress vector of polynomials;
+* approximate inverse of polyvec_compress
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const uint8_t *a: pointer to input byte array
+* (of length KYBER_POLYVECCOMPRESSEDBYTES)
+**************************************************/
+void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES])
+{
+ unsigned int i,j,k;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ uint16_t t[8];
+ for(i=0;i> 0) | ((uint16_t)a[ 1] << 8);
+ t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5);
+ t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10);
+ t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7);
+ t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4);
+ t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9);
+ t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6);
+ t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3);
+ a += 11;
+
+ for(k=0;k<8;k++)
+ r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11;
+ }
+ }
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ uint16_t t[4];
+ for(i=0;i> 0) | ((uint16_t)a[1] << 8);
+ t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6);
+ t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4);
+ t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2);
+ a += 5;
+
+ for(k=0;k<4;k++)
+ r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10;
+ }
+ }
+#else
+#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}"
+#endif
+}
+
+/*************************************************
+* Name: polyvec_tobytes
+*
+* Description: Serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECBYTES)
+* - const polyvec *a: pointer to input vector of polynomials
+**************************************************/
+void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_frombytes
+*
+* Description: De-serialize vector of polynomials;
+* inverse of polyvec_tobytes
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* - const polyvec *a: pointer to input vector of polynomials
+* (of length KYBER_POLYVECBYTES)
+**************************************************/
+void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES])
+{
+ unsigned int i;
+ for(i=0;ivec[i], a+i*KYBER_POLYBYTES);
+}
+
+/*************************************************
+* Name: polyvec_ntt
+*
+* Description: Apply forward NTT to all elements of a vector of polynomials
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+void polyvec_ntt(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_invntt_tomont
+*
+* Description: Apply inverse NTT to all elements of a vector of polynomials
+* and multiply by Montgomery factor 2^16
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+void polyvec_invntt_tomont(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_basemul_acc_montgomery
+*
+* Description: Multiply elements of a and b in NTT domain, accumulate into r,
+* and multiply by 2^-16.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ poly t;
+
+ poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]);
+ for(i=1;ivec[i], &b->vec[i]);
+ poly_add(r, r, &t);
+ }
+
+ poly_reduce(r);
+}
+
+/*************************************************
+* Name: polyvec_reduce
+*
+* Description: Applies Barrett reduction to each coefficient
+* of each element of a vector of polynomials;
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - polyvec *r: pointer to input/output polynomial
+**************************************************/
+void polyvec_reduce(polyvec *r)
+{
+ unsigned int i;
+ for(i=0;ivec[i]);
+}
+
+/*************************************************
+* Name: polyvec_add
+*
+* Description: Add vectors of polynomials
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ for(i=0;ivec[i], &a->vec[i], &b->vec[i]);
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.h
new file mode 100644
index 0000000000..57b605494e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/polyvec.h
@@ -0,0 +1,36 @@
+#ifndef POLYVEC_H
+#define POLYVEC_H
+
+#include
+#include "params.h"
+#include "poly.h"
+
+typedef struct{
+ poly vec[KYBER_K];
+} polyvec;
+
+#define polyvec_compress KYBER_NAMESPACE(polyvec_compress)
+void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a);
+#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress)
+void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]);
+
+#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes)
+void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a);
+#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes)
+void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]);
+
+#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt)
+void polyvec_ntt(polyvec *r);
+#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont)
+void polyvec_invntt_tomont(polyvec *r);
+
+#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery)
+void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b);
+
+#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce)
+void polyvec_reduce(polyvec *r);
+
+#define polyvec_add KYBER_NAMESPACE(polyvec_add)
+void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.c
new file mode 100644
index 0000000000..9d8e7edf83
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.c
@@ -0,0 +1,42 @@
+#include
+#include "params.h"
+#include "reduce.h"
+
+/*************************************************
+* Name: montgomery_reduce
+*
+* Description: Montgomery reduction; given a 32-bit integer a, computes
+* 16-bit integer congruent to a * R^-1 mod q, where R=2^16
+*
+* Arguments: - int32_t a: input integer to be reduced;
+* has to be in {-q2^15,...,q2^15-1}
+*
+* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q.
+**************************************************/
+int16_t montgomery_reduce(int32_t a)
+{
+ int16_t t;
+
+ t = (int16_t)a*QINV;
+ t = (a - (int32_t)t*KYBER_Q) >> 16;
+ return t;
+}
+
+/*************************************************
+* Name: barrett_reduce
+*
+* Description: Barrett reduction; given a 16-bit integer a, computes
+* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2}
+*
+* Arguments: - int16_t a: input integer to be reduced
+*
+* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
+**************************************************/
+int16_t barrett_reduce(int16_t a) {
+ int16_t t;
+ const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q;
+
+ t = ((int32_t)v*a + (1<<25)) >> 26;
+ t *= KYBER_Q;
+ return a - t;
+}
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.h
new file mode 100644
index 0000000000..c1bc1e4c7b
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/reduce.h
@@ -0,0 +1,16 @@
+#ifndef REDUCE_H
+#define REDUCE_H
+
+#include
+#include "params.h"
+
+#define MONT -1044 // 2^16 mod q
+#define QINV -3327 // q^-1 mod 2^16
+
+#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce)
+int16_t montgomery_reduce(int32_t a);
+
+#define barrett_reduce KYBER_NAMESPACE(barrett_reduce)
+int16_t barrett_reduce(int16_t a);
+
+#endif
diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric-shake.c
new file mode 100644
index 0000000000..20f451882e
--- /dev/null
+++ b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512-ipd_ref/symmetric-shake.c
@@ -0,0 +1,74 @@
+#include
+#include
+#include